
import os

genes_all = []
orthologs = []
mgm_genes = ''
print (os.listdir('.'))
for plik in os.listdir('.'):
	if '.annotations' in plik and '#' not in plik:
		f = open(plik, 'r').readlines()
		dlugosc_pliku=len(f)-4
		cnt = 0
		genes = []
		for line in f:	
			if '#' not in line:
				cnt+=1
				line = line.strip().split('\t')
				#print line
				gene = line[4]
				#print line[4]
				if len(gene)>1:
					genes.append(gene)
					genes_all.append(gene)
				if cnt==dlugosc_pliku:
					mgm_genes=line[0][5:]
		print (plik, mgm_genes, 'othologs', dlugosc_pliku, len(genes), len(set(genes)))
		
print (len(genes_all))
print (len(set(genes_all)))
