#!/usr/bin/env python
import xml.etree.ElementTree as ET
import sys, urllib, urllib2
eutils_base_url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
def get_ids(taxid):
accession_numbers =[]
retstart = 0
iteration_step = 10000
while True:
result = esearch(db = "nucleotide", term = "txid%s[Organism:exp]"%taxid, retstart = retstart, retmax = iteration_step)
try:
result = ET.fromstring(result)
ids = []
if result.find('IdList') is not None:
for id in result.find('IdList').findall('Id'):
ids.append(id.text)
result = esummary(db = "nucleotide", ids = ids, retmax = iteration_step)
result = ET.fromstring(result)
for docsum in result.findall('DocSum'):
for item in docsum.findall("Item[@Name='Caption']"):
accession_numbers.append(item.text)
else:
break
except Exception, e:
print e
retstart += iteration_step
return accession_numbers
def esearch(db, term, retstart = 0, retmax = 20):
response = urllib.urlopen("%sesearch.fcgi?db=%s&term=%s&retstart=%i&retmax=%i"%(eutils_base_url, db, term, retstart, retmax))
content = str(response.read())
response.close()
return content
def esummary(db, ids, retstart = 0, retmax = 20):
data = {
'db':db,
'id':','.join(ids)
}
data = urllib.urlencode(data)
req = urllib2.Request("%sesummary.fcgi"%eutils_base_url, data)
response = urllib2.urlopen(req)
content = str(response.read())
response.close()
return content
if __name__ == '__main__':
taxid = None
if "-id" in sys.argv:
taxid = sys.argv[sys.argv.index("-id")+1]
if not taxid:
print "Usage: taxid_2_gbids.py -id taxid"
print "Example: taxid_2_gbids.py -id 4754"
sys.exit(-1)
ids = get_ids(taxid)
print ids
print "%i ids found..."%len(ids)
0 comments:
Post a Comment