1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
| >>> import re
>>> QUERY = re.compile(r"Query= \S+(\w{2}).*len=(\d+)")
>>> from collections import OrderedDict
>>> data, query_id = OrderedDict(), None
>>> with open("query.txt", "r") as fh:
... for line in fh:
... m = QUERY.match(line)
... if m:
... query_id = m.groups()
... elif query_id and "," in line:
... item = line.split(",")[0]
... data.setdefault(query_id, []).append(item)
... elif query_id and ")" in line:
... item = line.split(")")[0] + ")"
... data.setdefault(query_id, []).append(item)
...
>>> from pprint import pprint
>>> pprint(data)
OrderedDict([(('c1', '6215'),
['gb|M19966.1|HSEGLY13A Equine herpesvirus type 1 (EHV-1)',
'dbj|AB618031.1| Escherichia coli (type 1 /strain RH2) DNA',
'>gb|M19966.1|HSEGLY13A Equine herpesvirus type 1 (EHV-1)']),
(('c2', '2860'),
['gb|AY206683.1| SEN virus strain SENV-H Orf2 gene',
'dbj|AB059353.1| SEN virus SENV-H gene',
'gb|KM593803.1| SEN virus strain HDMU-97 ORF1 gene',
'gb|KM593802.1| SEN virus strain HDMU-13 ORF1 gene',
'gb|AY183662.1| SEN virus H gene',
'gb|EU305675.1| Homo sapiens isolate LTT7 ORF1 gene',
'dbj|AB024373.2| Mus musculus DNA for ORF2',
'dbj|AB024379.1| Mus musculus DNA for ORF2',
'gb|AF464086.1| Mus musculus isolate BD 493-06 ORF2 mRNA',
'dbj|AB038620.1| Mus musculus complete genome',
'gb|DQ186997.1| Homo sapiens clone BM1A-09 ORF2 (ORF2)',
'dbj|AB028669.1| Mus musculus gene for ORF1 and ORF2',
'gb|DQ186998.1| Homo sapiens clone BM1A-13 ORF2 (ORF2)',
'gb|DQ186996.1| Homo sapiens clone BM1A-02 ORF2 (ORF2)',
'dbj|AB025946.2| Homo sapiens 19 DNA',
'emb|FR751493.1| Homo sapiens complete genome',
'dbj|AB038619.1| Mus musculus complete genome',
'emb|FR751480.1| Homo sapiens complete genome',
'emb|FR751476.1| Homo sapiens complete genome',
'emb|FR751479.1| Homo sapiens complete genome',
'emb|AJ620226.1| Homo sapiens',
'dbj|AB064607.1| Homo sapiens 10 DNA']),
(('c3', '815'),
['gb|M19966.1|HSEGLY13A Equine herpesvirus type 1 (EHV-1)']),
(('c4', '65'),
['gb|M19966.1|HSEGLY13A Equine herpesvirus type 1 (EHV-1)',
'>emb|AJ315436.1| Mus musculus DNA',
'>dbj|AB054648.1| Homo sapiens DNA',
'>dbj|AB028300.1| Mus musculus DNA',
'>dbj|AB017911.1| Mus musculus DNA',
'>gb|AF129887.1|AF129887 Mus musculus TTVCHN2'])])
>>> |
Partager