| 12
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 
 | import optparse, sys
 
splitchar1 = '\t'
splitchar2 = ' '
 
# for brat, overlapped is not permitted (or at least a warning is generated)
# we could use this simplification in sorting by simply sorting on begin.  it is
# probably a good idea anyway.
class AnnotationRecord:
    label = 'T0'
    type = ''
    begin = -1
    end = -1
    text = ''
 
    def __repr__(self):
        return self.label  + splitchar1 + self.type + splitchar2 + str(self.begin) + splitchar2 + str(self.end) + splitchar1 + self.text
 
def create_record(parts):
    record = AnnotationRecord()
    record.label = parts[0]
    middle_parts = parts[1].split(splitchar2)
    record.type = middle_parts[0]
    record.begin = middle_parts[1]
    record.end = middle_parts[2]
    record.text = parts[2]
    return record
 
def main(filename, out_filename):
    fo = open(filename, 'r')
    lines = fo.readlines()
    fo.close()
 
    annotation_records = []
 
    for line in lines:
        parts = line.split(splitchar1)
        annotation_records.append(create_record(parts))
 
    # sort based upon begin    
    sorted_annotation_records = sorted(annotation_records, key=lambda a: int(a.begin))
 
    # now relabel based upon the sorted order
    label_value = 1
    for sorted_record in sorted_annotation_records:
        sorted_record.label = 'T' + str(label_value)
        label_value += 1
 
    # now write the resulting file to disk
    fo = open(out_filename, 'w')
    for sorted_record in sorted_annotation_records:
        fo.write(sorted_record.__repr__())        
    fo.close()
 
 
#format of .ann file is T# Type Start End Text
#args are input file, output file
if __name__ == '__main__':
    parser = optparse.OptionParser(formatter=optparse.TitledHelpFormatter(), usage=globals()['__doc__'], version='$Id$')
    parser.add_option ('-v', '--verbose', action='store_true', default=False, help='verbose output')
    (options, args) = parser.parse_args()
    if len(args) < 2:
        parser.error ('missing argument')
    main(args[0], args[1])
    sys.exit(0) | 
Partager