2011年12月30日星期五

ID To Namelist


ID To Namelist

import os
import time, sys
from Bio import SeqIO

filename=raw_input("Pls enter the name of the output txt file of Blastall.exe: ")
Approach=raw_input("Pls enter the the approach of your Datebase: ")

# Timing start
start=time.time()
wrerr = sys.stderr.write


# Create a file named 'Results' in current dir
if os.path.exists('Results'):
    for root, dirs, files in os.walk('Results'):
        for name in files:
            os.remove(os.path.join(root,name))
else:
    os.mkdir('Results')

f=open(filename,'r')
f1=open('Results//ID_datebase.txt','w')

a={}
b=[]

# Search every dirs and files at F:\python missions\YY\Datebase
for root,dirs,files in os.walk(Approach):
    for file in files:
        for record in SeqIO.parse(os.path.join(root, file), 'fasta'):
            b.append(record.id)
        a[os.path.join(root, file).split('\\')[-2]+'-'+os.path.join(root, file).split('\\')[-1]]=b
        b=[]

# Output ID_datebase
f1.write('Namelist'+'\t'+'Corresponding ID'+'\n')
for key in sorted(a.keys()):
    f1.write(str(key)+'\t'+str(a[key])+'\n')

# Name the IDs
num=0

for line in f:
    num+=1
    for key in a.keys():
        if line.split('\t')[1].strip() in str(a[key]):
            f2=open('Results\\'+key+'.txt','a')
            f2.write(str(line).strip()+'\t'+key+'\t'+'\n')
    if num%100000==0:
        print str(num)+" sequences have been processed!"
   
end=time.time()
wrerr("OK, All Work Finished in %3.2f secs\n" % (end-start))
raw_input("Press <Enter> to close this window: ")