This file is indexed.

/usr/share/seqsero/libs/Otype_determine_analysis.py is in seqsero 1.0-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
#!/usr/bin/env python

'''
revised at 11/24/2013
note:
The .py file mainly designed to deal with 5 different situations: 1.the gnd and galF on same contig; 2.the two genes on different contigs;
3.the two genes on different contigs, and gnd gene is splitted by two contigs; 4.the two genes on different contigs, and galF gene is splitted by two contigs;
5.the two genes on different contigs, and the two genes are both splitted by two contigs respectively;

input:
1) <gnd_galF_sequence>: a fasta file containing galF and gnd sequences
2) <target>: a fasta file containing the genome (may be seperated contigs), mainly as the blast database for <gnd_galF_sequence> to extract rfb region
3) <database>: a fasta file containing rfb regions of 46 different O-serotypes

output: the region between the galF and gnd in the target(blatdb) sequebce (the rfb region, assuming the rfb region is flanked by the two genes)
synopsis: ./Otype_determine_analysis.py <gnd_galF_sequence> <target> <database>
'''
from __future__ import division
import sys
import os
from Bio.Blast import NCBIXML
from Bio import SeqIO


def test_O29(subdatabase):
  first_hspbit=[]
  first_alignmentlist=[]
  global Choice,Choice_score,O_9_score,O_2_score
  O_9_score=0
  O_2_score=0
  os.system('makeblastdb -in database/'+subdatabase+' -out '+subdatabase+'_db -dbtype nucl')###01/28/2015
  os.system('blastn -query '+target+' -db '+subdatabase+'_db -out '+subdatabase+'_vs_'+target+'.xml '+'-outfmt 5')###01/28/2015
  xml_file=subdatabase+'_vs_'+target+'.xml'
  result_handle=open(xml_file)
  blast_record=NCBIXML.parse(result_handle)
  records=list(blast_record)
  if subdatabase=="tyr_of_O2_O9.fasta":
    for record in records:    #there are many records (i.e. the '>' in query file), so change another method
      for alignment in record.alignments:
        if 'O-9' in alignment.hit_def:
          for hsp in alignment.hsps:
            if hsp.expect<E_thresh:
              O_9_score=O_9_score+hsp.bits
        elif 'O-2' in alignment.hit_def:
          for hsp in alignment.hsps:
            if hsp.expect<E_thresh:
              O_2_score=O_2_score+hsp.bits
    if O_9_score>100:
      if O_9_score>O_2_score:
        print '$$$ Most possible O_type: O-9','\n'
        print '$$$ longest_bit_score:',O_9_score,'\n'
      else:
        print '$$$ Most possible O_type: O-2','\n'
        print '$$$ longest_bit_score:',O_2_score,'\n'
    else:
      print "Assumpition wrong, no O2 or O9, return to re-analysis"
      if ('O-2_' not in Sec_Choice) and ('O-9_' not in Sec_Choice):
        print '$$$ Most possible O_type Choice (no tyr difference):',Sec_Choice
      if (('O-2_' in Sec_Choice) or ('O-9_' in Sec_Choice)) and ('O-2_' not in Third_Choice) and ('O-9_' not in Third_Choice):
          print '$$$ Most possible O_type Choice (no tyr difference):',Third_Choice
    os.system("rm tyr_of_O2_O9.fasta_db.*")###01/28/2015
    os.system("rm "+xml_file)###01/28/2015
  if subdatabase=="oafA_of_O4_O5.fasta":
    try:
      for record in records:    #there are many records (i.e. the '>' in query file), so change another method
        for alignment in record.alignments:
          if 'O-4_full' in alignment.hit_def:
            for hsp in alignment.hsps:
              if hsp.expect<E_thresh:
                O_9_score=O_9_score+hsp.bits
          elif 'O-4_5-' in alignment.hit_def:
            for hsp in alignment.hsps:
              if hsp.expect<E_thresh:
                O_2_score=O_2_score+hsp.bits
      if O_9_score>100:
        if O_9_score>O_2_score:
          print '$$$O5_none_7_base_deletion','\n'
          print '$$$ longest_bit_score:',O_9_score,'\n'
        else:
          print '$$$O5-','\n'
          print '$$$ longest_bit_score:',O_2_score,'\n'
      else:
        print '$$$O5_none_7_base_deletion,unsure','\n'
      os.system("rm oafA_of_O4_O5.fasta_db.*")###01/28/2015
      os.system("rm "+xml_file)###01/28/2015
    except:
      print "No oafA genes"
  if subdatabase=="O_3,10_and_1,3,19_spe.fasta":
    try:
      for record in records:    #there are many records (i.e. the '>' in query file), so change another method
        for alignment in record.alignments:
          if 'O_3,10_not_in_1,3,19' in alignment.hit_def:
            for hsp in alignment.hsps:
              if hsp.expect<E_thresh:
                O_9_score=O_9_score+hsp.bits
          elif 'O_1,3,19_not_in_3,10' in alignment.hit_def:
            for hsp in alignment.hsps:
              if hsp.expect<E_thresh:
                O_2_score=O_2_score+hsp.bits
      if O_9_score>200:
        if O_9_score>O_2_score:
          print '$$$O3,10 more possible','\n'
          print '$$$ longest_bit_score:',O_9_score,'\n'
      else:
        if O_2_score>100:
          print '$$$O1,3,19 more possible','\n'
          print '$$$ longest_bit_score:',O_2_score,'\n'
      os.system("rm O_3,10_and_1,3,19_spe.fasta_db.*")###01/28/2015
      os.system("rm "+xml_file)###01/28/2015
    except:
      print "No O3,10_and_O1,3,19 spe sequences"


def show_result():
        global First_Choice
        global Sec_Choice
        global Third_Choice
        hspbit=[]
        alignmentlist=[]
        for record in records:
          for alignment in record.alignments:
            hsp_bit_score=0
            startlist=[]
            endlist=[]
            for hsp in alignment.hsps:
              if hsp.expect<E_thresh:
                start=hsp.query_start
                end=hsp.query_end
                if len(startlist)==0:
                  startlist.append(start)
                  endlist.append(end)
                  hsp_bit_score=hsp_bit_score+hsp.bits
                elif len(startlist)==1:
                  if end<=startlist[0] or start>=endlist[0]:
                    startlist.append(start)
                    endlist.append(end)
                    hsp_bit_score=hsp_bit_score+hsp.bits
                  if start>=startlist[0] and end<=endlist[0]:
                    print hsp_bit_score,'Fully overlapped hsp, ignore',hsp_bit_score
                    continue
                  if start<startlist[0] and startlist[0]<end<endlist[0]:
                    print hsp_bit_score,'Partially overlapped hsp, trying to extract unoverlap part',hsp_bit_score
                    newend=startlist[0]-1
                    percentage=(newend-start)/(end-start)
                    startlist.append(start)
                    endlist.append(newend)
                    hsp_bit_score=hsp_bit_score+percentage*hsp.bits
                  if end>endlist[0] and startlist[0]<start<endlist[0]:
                    print hsp_bit_score,'Partially overlapped hsp, trying to extract unoverlap part',hsp_bit_score
                    newstart=endlist[0]+1
                    percentage=(end-newstart)/(end-start)
                    startlist.append(newstart)
                    endlist.append(end)
                    hsp_bit_score=hsp_bit_score+percentage*hsp.bits
                elif len(startlist)>1:
                  startlist.sort()
                  endlist.sort()
                  length=len(startlist)
                  if end<=startlist[0] or start>=endlist[length-1]:
                    startlist.append(start)
                    endlist.append(end)
                    hsp_bit_score=hsp_bit_score+hsp.bits
                  if start<startlist[0] and startlist[0]<end<endlist[0]:
                    print hsp_bit_score,'Partially overlapped hsp, trying to extract unoverlap part',hsp_bit_score
                    newend=startlist[0]-1
                    percentage=(newend-start)/(end-start)
                    startlist.append(start)
                    endlist.append(newend)
                    hsp_bit_score=hsp_bit_score+hsp.bits*percentage
                  if end>endlist[length-1] and startlist[length-1]<start<endlist[length-1]:
                    print hsp_bit_score,'Partially overlapped hsp, trying to extract unoverlap part',hsp_bit_score
                    newstart=endlist[length-1]+1
                    percentage=(end-newstart)/(end-start)
                    startlist.append(newstart)
                    endlist.append(end)
                    hsp_bit_score=hsp_bit_score+hsp.bits*percentage
                  else:
                    for i in range(0,length-1):         #use two i and i+1 as basic combinations
                      if (start>=startlist[i] and end<=endlist[i]) or (start>=startlist[i+1] and end<=endlist[i+1]):
                        print hsp_bit_score,'Fully overlapped hsp, ignore',hsp_bit_score
                        continue
                      if endlist[i]<=start<end<=startlist[i+1]:
                        startlist.append(start)
                        endlist.append(end)
                        hsp_bit_score=hsp_bit_score+hsp.bits
                      if startlist[i]<=start<=endlist[i] and endlist[i]<end<startlist[i+1]:
                        print hsp_bit_score,'Partially overlapped hsp, trying to extract unoverlap part',hsp_bit_score 
                        newstart=endlist[i]+1
                        percentage=(end-newstart)/(end-start)
                        startlist.append(newstart)
                        endlist.append(end)
                        hsp_bit_score=hsp_bit_score+hsp.bits*percentage
                      if endlist[i]<start<startlist[i+1] and startlist[i+1]<=end<=endlist[i+1]:
                        print hsp_bit_score,'Partially overlapped hsp, trying to extract unoverlap part',hsp_bit_score
                        newend=startlist[i+1]-1
                        percentage=(newend-start)/(end-start)
                        startlist.append(start)
                        endlist.append(newend)
                        hsp_bit_score=hsp_bit_score+hsp.bits*percentage
            alignment=alignment.hit_def
            hspbit.append(hsp_bit_score)
            alignmentlist.append(alignment)

        scorelist=dict(zip(alignmentlist,hspbit))
        score=0
        for Otype in scorelist:
          if scorelist[Otype]>score:
            First_Choice=Otype
            score=scorelist[Otype]
                
        secscore=0
        for Otype in scorelist:
          if scorelist[Otype]>secscore and Otype!=First_Choice:
            Sec_Choice=Otype
            secscore=scorelist[Otype] 
        
        thirdscore=0
        for Otype in scorelist:
          if scorelist[Otype]>thirdscore and Otype!=First_Choice and Otype!=Sec_Choice:
            Third_Choice=Otype
            thirdscore=scorelist[Otype]

        if thirdscore==0:
          names=First_Choice+Sec_Choice
        else:
          names=First_Choice+Sec_Choice+Third_Choice
        if secscore==0:
          names=First_Choice
        else:
          names=First_Choice+Sec_Choice
        if score==0:
          print "$$$ No O_type, due to no hit of rfb"
          names=""
        if 'O-2_' in names and 'O-9_' in names and ('O-2_' in First_Choice or 'O-9_' in First_Choice):
          print '#Contain O2 and O9, so change to special test'
          test_O29("tyr_of_O2_O9.fasta")
        else:
          if score>0:
            print '$$$ Most possible O_type: ',First_Choice,'\n'
            print '$$$ Most bit_score:',score,'\n'
            if "O-4_" in First_Choice:#$$$$$$$
              test_O29("oafA_of_O4_O5.fasta")#$$$$$$$
            if "O-1,3,19" in First_Choice or "O-3,10" in First_Choice:
              test_O29("O_3,10_and_1,3,19_spe.fasta")#$$$$$$$
          if secscore>0:
            print '$$$ Second possible O_type: ',Sec_Choice,'\n'
            print '$$$ Second bit_score:',secscore,'\n'
          if thirdscore>0:
            print '$$$ Third possible O_type: ',Third_Choice,'\n'
            print '$$$ Third bit_score:',thirdscore,'\n'







queries=sys.argv[1]
target=sys.argv[2]
database=sys.argv[3]
output=target.split('.')[0]+'_out.fa'
print "$$:",target


os.system('makeblastdb -in '+target+' -out '+target+'_db '+'-dbtype nucl')###01/28/2015
os.system('blastn -query '+queries+' -db '+target+'_db '+'-out '+queries+'_vs_'+target+'.xml '+'-outfmt 5')###01/28/2015, since it's abs address for "run_auto*.py", so no need to change "query" address this time
xml_file=queries+'_vs_'+target+'.xml'
print '\n'
result_handle=open(xml_file)
blast_record=NCBIXML.parse(result_handle)
blast_record=list(blast_record)
target_seq=SeqIO.parse(target,'fasta')
target_seq=list(target_seq)
#os.system("rm "+xml_file)###01/28/2015
#os.system("rm "+target+'_db.*')###01/28/2015

if len(blast_record)==2:
  print 'Hits have been got'+'\n'
  if len(blast_record[0].alignments)==1 and len(blast_record[1].alignments)==1:
    print 'Checking the number of alignments:  2 alignments obtained'+'\n'
    if len(blast_record[0].alignments[0].hsps)==1 and len(blast_record[1].alignments[0].hsps)==1:
      print 'Checking the number of hsps:  each alignment has 1 hsp'+'\n'
      if blast_record[0].alignments[0].hit_def==blast_record[1].alignments[0].hit_def:
        print 'Checking locations of hits:  Both hits are located in '+'"'+str(blast_record[0].alignments[0].hit_def)+'"'+'...'+'\n'
               
        hit_1_start=blast_record[0].alignments[0].hsps[0].sbjct_start
        hit_1_end=blast_record[0].alignments[0].hsps[0].sbjct_end

        hit_2_start=blast_record[1].alignments[0].hsps[0].sbjct_start
        hit_2_end=blast_record[1].alignments[0].hsps[0].sbjct_end

        if hit_1_start>hit_1_end:
          buffer=hit_1_start
          hit_1_start=hit_1_end
          hit_1_end=buffer

        if hit_2_start>hit_2_end:
          buffer=hit_2_start
          hit_2_start=hit_2_end
          hit_2_end=buffer

        print 'hit_1_start: '+str(hit_1_start)
        print 'hit_1_end: '+str(hit_1_end)

        print 'hit_2_start: '+str(hit_2_start)
        print 'hit_2_end: '+str(hit_2_end)

    
        if hit_1_end<hit_2_start:                    
          extract_start=hit_1_end+1                     
          extract_end=hit_2_start-1                        
        else:                                       
          extract_start=hit_2_end+1
          extract_end=hit_1_start-1
                  
        print 'start: '+str(extract_start), 'end: '+str(extract_end)+'\n'

        for contig in target_seq:
          if (contig.description==blast_record[0].alignments[0]) or (contig.description.replace(" ","")==blast_record[0].alignments[0].hit_def.replace(" ","")):
            target_contig=contig

        rfb_region=target_contig[extract_start:extract_end]
        print 'Extracted rfb region length:  '+str(len(rfb_region.seq.tostring()))+'\n'
        print 'Extracted rfb region saved in:  '+output+'\n'

        outfile=open(output,'w')
        title='>'+target.split('.')[0]+' rfb region:'+blast_record[0].alignments[0].hit_def+':'+str(extract_start)+' to '+str(extract_end)+'_'+str(len(rfb_region.seq.tostring()))+'bp'+')'
        outfile.write(title)  
        outfile.write('\n')
        outfile.write(rfb_region.seq.tostring())
        outfile.close()

               
        os.system('makeblastdb -in '+database+' -out '+database+'_db '+'-dbtype nucl')
        os.system('blastn -query '+output+' -db '+database+'_db '+'-out '+'Blast_Otype_'+target+'.xml '+'-outfmt 5')
        xml_file='Blast_Otype_'+target+'.xml'
        print '\n'


        filehandle=open(xml_file)
        records=NCBIXML.parse(filehandle)
        records=list(records)
        target_hspbit=0      #give a original value to target_hspbit
        second_hspbit=0
        third_hspbit=0
        E_thresh=1e-10
        show_result()

      else:
        print 'Checking locations of hits:  the two hits are not located in same contig......'+'\n'
        hit_1_start=blast_record[0].alignments[0].hsps[0].sbjct_start
        hit_1_end=blast_record[0].alignments[0].hsps[0].sbjct_end
        hit_2_start=blast_record[1].alignments[0].hsps[0].sbjct_start
        hit_2_end=blast_record[1].alignments[0].hsps[0].sbjct_end

        if hit_1_start>hit_1_end:
          buffer=hit_1_start
          hit_1_start=hit_1_end
          hit_1_end=buffer

        if hit_2_start>hit_2_end:
          buffer=hit_2_start
          hit_2_start=hit_2_end
          hit_2_end=buffer

        outfile=open(output,'w')
        for contig in target_seq:
          if contig.description==blast_record[0].alignments[0].hit_def:
            potentialsequence1=contig.seq[0:hit_1_start-1].tostring()
            potentialsequence2=contig.seq[hit_1_end+1:].tostring()
            title1='>'+target.split('.')[0]+' potential rfb region:'+blast_record[0].alignments[0].hit_def+':0 to '+str(hit_1_start-1)+'_total:'+str(len(potentialsequence1))+'bp'
            outfile.write(title1)  
            outfile.write('\n')
            outfile.write(potentialsequence1)
            outfile.write('\n')
            title2='>'+target.split('.')[0]+' potential rfb region:'+blast_record[0].alignments[0].hit_def+':'+str(hit_1_end+1)+' to contig end'+'_total:'+str(len(potentialsequence2))+'bp'
            outfile.write(title2)  
            outfile.write('\n')
            outfile.write(potentialsequence2)
            outfile.write('\n')  
          elif contig.description==blast_record[1].alignments[0].hit_def:
            potentialsequence1=contig.seq[0:hit_2_start-1].tostring()
            potentialsequence2=contig.seq[hit_2_end+1:].tostring()
            title1='>'+target.split('.')[0]+' potential rfb region:'+blast_record[1].alignments[0].hit_def+':0 to '+str(hit_2_start-1)+'_total:'+str(len(potentialsequence1))+'bp'
            outfile.write(title1)  
            outfile.write('\n')
            outfile.write(potentialsequence1)
            outfile.write('\n')
            title2='>'+target.split('.')[0]+' potential rfb region:'+blast_record[1].alignments[0].hit_def+':'+str(hit_2_end+1)+' to contig end'+'_total:'+str(len(potentialsequence2))+'bp'
            outfile.write(title2)  
            outfile.write('\n')
            outfile.write(potentialsequence2)
            outfile.write('\n')

    
        outfile.close()

        os.system('makeblastdb -in '+database+' -out '+database+'_db '+'-dbtype nucl')
        os.system('blastn -query '+output+' -db '+database+'_db '+'-out '+'Blast_Otype_'+target+'.xml '+'-outfmt 5')
        xml_file='Blast_Otype_'+target+'.xml'
        print '\n'


        filehandle=open(xml_file)
        records=NCBIXML.parse(filehandle)
        records=list(records)
        realrecord1=records[0]
        if len(records[1].alignments)>len(records[0].alignments):
          realrecord1=records[1]
        realrecord2=records[2]
        if len(records[3].alignments)>len(records[2].alignments):
          realrecord2=records[3]
        title='>'+records[0].query.split(':')[0]+'_combined_potential_sequences'
        filecontent=SeqIO.parse(output,'fasta')
        filecontent=list(filecontent)
        for contig in filecontent:
          if contig.description==realrecord1.query:
            sequence=contig.seq.tostring()

        for contig in filecontent:
          if contig.description==realrecord2.query:
            sequence=sequence+contig.seq.tostring()

        outfile=open('combined_sequence.fasta','w')
        outfile.write(title)
        outfile.write('\n')
        outfile.write(sequence)
        outfile.close()
                
        os.system('blastn -query combined_sequence.fasta'+' -db '+database+'_db '+'-out '+'Combined_seq_blast_'+target+'.xml '+'-outfmt 5')
        xml_file='Combined_seq_blast_'+target+'.xml'
        print '\n'

        filehandle=open(xml_file)
        records=NCBIXML.parse(filehandle)
        records=list(records)
        E_thresh=1e-10
                
        show_result()
    
        

    else:
      print '$$$ No O_type result, please check the number of hsps:  some alignment have more than 1 hsp (galF or gnd sequences has one more hits in tested genome), that\'s unusual for for our short sequence gnd and galF, please check your submited sequence'+'\n'
     
          
  elif len(blast_record[0].alignments)>1 and len(blast_record[1].alignments)==1:
    print 'The gnd gene is splited on different contigs of your submitted sequence' +'\n'
    for record in blast_record:
      for alignment in record.alignments:
        if len(alignment.hsps)!=1:
          print '$$$ No O_type result, please check the number of hsp:  some alignment have more than 1 hsp (galF or gnd sequences has one more hits in tested genome), that\'s unusual for our short sequence gnd and galF, please check your submited sequence'+'\n'
          break
        
    print 'Each alignment has one hsps'+'\n'
     

                
    target_seq=SeqIO.parse(target,'fasta')
    target_seq=list(target_seq)
    outfile=open(output,'w')
        
    for alignment in blast_record[0].alignments:
      for contig in target_seq:
        if contig.description==alignment.hit_def and len(alignment.hsps[0].sbjct)!=len(contig.seq):
          hitstart=alignment.hsps[0].sbjct_start
          hitend=alignment.hsps[0].sbjct_end
          if hitstart>hitend:
            buffer=hitstart
            hitstart=hitend
            hitend=buffer
          potential1=contig.seq[0:hitstart-1].tostring()
          title1='>'+target.split('.')[0]+' potential rfb region:'+alignment.hit_def+':0 to '+str(hitstart-1)+'_total:'+str(len(potential1))+'bp'
          potential2=contig.seq[hitend+1:].tostring()
          title2='>'+target.split('.')[0]+' potential rfb region:'+alignment.hit_def+':'+str(hitend+1)+' to contig end'+'_total:'+str(len(potential2))+'bp'
          aim_sequence=potential1
          title=title1
          if len(potential1)<len(potential2):
            aim_sequence=potential2
            title=title2
          outfile.write(title)
          outfile.write('\n')
          outfile.write(aim_sequence)
          outfile.write('\n')
            
            
    for alignment in blast_record[1].alignments:
      for contig in target_seq:
        if contig.description==alignment.hit_def:
          hitstart=alignment.hsps[0].sbjct_start
          hitend=alignment.hsps[0].sbjct_end
          if hitstart>hitend:
            buffer=hitstart
            hitstart=hitend
            hitend=buffer
          potential1=contig.seq[0:hitstart-1].tostring()
          title1='>'+target.split('.')[0]+' potential rfb region:'+alignment.hit_def+':0 to '+str(hitstart-1)+'_total:'+str(len(potential1))+'bp'
          potential2=contig.seq[hitend+1:].tostring()
          title2='>'+target.split('.')[0]+' potential rfb region:'+alignment.hit_def+':'+str(hitend+1)+' to contig end'+'_total:'+str(len(potential2))+'bp'
          aim_sequence=potential1
          outfile.write(title1)
          outfile.write('\n')
          outfile.write(potential1)
          outfile.write('\n')
          outfile.write(title2)
          outfile.write('\n')
          outfile.write(potential2)
          outfile.write('\n')



    outfile.close()

    os.system('makeblastdb -in '+database+' -out '+database+'_db '+'-dbtype nucl')
    os.system('blastn -query '+output+' -db '+database+'_db '+'-out '+'Blast_Otype_'+target+'.xml '+'-outfmt 5')
    print '\n'
    xml_file2='Blast_Otype_'+target+'.xml'
    filehandle=open(xml_file2)
    records=NCBIXML.parse(filehandle)
    records=list(records)
    print len(records)
    realrecord1=records[0]
    if len(records[1].alignments)>len(records[0].alignments):
      realrecord1=records[1]
    realrecord2=records[2]
    if len(records[3].alignments)>len(records[2].alignments):
      realrecord2=records[3]
    title='>'+records[0].query.split(':')[0]+'_combined_potential_sequences'
    filecontent=SeqIO.parse(output,'fasta')
    filecontent=list(filecontent)
            
    for contig in filecontent:
      if contig.description==realrecord1.query:
        sequence=contig.seq.tostring()

    for contig in filecontent:
      if contig.description==realrecord2.query:
        sequence=sequence+contig.seq.tostring()
         

    outfile=open('combined_sequence.fasta','w')
    outfile.write(title)
    outfile.write('\n')
    outfile.write(sequence)
    outfile.close()
                
    os.system('blastn -query combined_sequence.fasta'+' -db '+database+'_db '+'-out '+'Combined_seq_blast_'+target+'.xml '+'-outfmt 5')
    xml_file='Combined_seq_blast_'+target+'.xml'
    print '\n'


    filehandle=open(xml_file)
    records=NCBIXML.parse(filehandle)
    records=list(records)
    E_thresh=1e-10  
    show_result()



  elif len(blast_record[0].alignments)==1 and len(blast_record[1].alignments)>1:
    print 'The galF gene is splited on different contigs of your submitted sequence' +'\n'
    for record in blast_record:
      for alignment in record.alignments:
        if len(alignment.hsps)!=1:
          print '$$$ No O_type result, please check the number of hsps:  some alignment have more than 1 hsp (galF or gnd sequences has one more hits in tested genome), that\'s unusual for our short sequence gnd and galF, please check your submited sequence'+'\n'
          break
        
    print 'Each alignment has one hsp'+'\n'                 
    outfile=open(output,'w')

    for alignment in blast_record[0].alignments:
      for contig in target_seq:
        if contig.description==alignment.hit_def:
          hitstart=alignment.hsps[0].sbjct_start
          hitend=alignment.hsps[0].sbjct_end
          if hitstart>hitend:
            buffer=hitstart
            hitstart=hitend
            hitend=buffer
          potential1=contig.seq[0:hitstart-1].tostring()
          title1='>'+target.split('.')[0]+' potential rfb region:'+alignment.hit_def+':0 to '+str(hitstart-1)+'_total:'+str(len(potential1))+'bp'
          potential2=contig.seq[hitend+1:].tostring()
          title2='>'+target.split('.')[0]+' potential rfb region:'+alignment.hit_def+':'+str(hitend+1)+' to contig end'+'_total:'+str(len(potential2))+'bp'
          aim_sequence=potential1
          outfile.write(title1)
          outfile.write('\n')
          outfile.write(potential1)
          outfile.write('\n')
          outfile.write(title2)
          outfile.write('\n')
          outfile.write(potential2)
          outfile.write('\n')

        
    for alignment in blast_record[1].alignments:
      for contig in target_seq:
        if contig.description==alignment.hit_def and len(alignment.hsps[0].sbjct)!=len(contig.seq):
          hitstart=alignment.hsps[0].sbjct_start
          hitend=alignment.hsps[0].sbjct_end
          if hitstart>hitend:
            buffer=hitstart
            hitstart=hitend
            hitend=buffer
          potential1=contig.seq[0:hitstart-1].tostring()
          title1='>'+target.split('.')[0]+' potential rfb region:'+alignment.hit_def+':0 to '+str(hitstart-1)+'_total:'+str(len(potential1))+'bp'
          potential2=contig.seq[hitend+1:].tostring()
          title2='>'+target.split('.')[0]+' potential rfb region:'+alignment.hit_def+':'+str(hitend+1)+' to contig end'+'_total:'+str(len(potential2))+'bp'
          aim_sequence=potential1
          title=title1
          if len(potential1)<len(potential2):
            aim_sequence=potential2
            title=title2
          outfile.write(title)
          outfile.write('\n')
          outfile.write(aim_sequence)
          outfile.write('\n')

         
    outfile.close()

    os.system('makeblastdb -in '+database+' -out '+database+'_db '+'-dbtype nucl')
    os.system('blastn -query '+output+' -db '+database+'_db '+'-out '+'Blast_Otype_'+target+'.xml '+'-outfmt 5')
    print '\n'
    xml_file='Blast_Otype_'+target+'.xml'
    filehandle=open(xml_file)
    records=NCBIXML.parse(filehandle)
    records=list(records)
    realrecord1=records[0]
    if len(records[1].alignments)>len(records[0].alignments):
      realrecord1=records[1]
    realrecord2=records[2]
    if len(records[3].alignments)>len(records[2].alignments):
      realrecord2=records[3]
    title='>'+records[0].query.split(':')[0]+'_combined_potential_sequences'
    filecontent=SeqIO.parse(output,'fasta')
    filecontent=list(filecontent)
            
    for contig in filecontent:
      if contig.description==realrecord1.query:
        sequence=contig.seq.tostring()

    for contig in filecontent:
      if contig.description==realrecord2.query:
        sequence=sequence+contig.seq.tostring()
         

    outfile=open('combined_sequence.fasta','w')
    outfile.write(title)
    outfile.write('\n')
    outfile.write(sequence)
    outfile.close()
                
    os.system('blastn -query combined_sequence.fasta'+' -db '+database+'_db '+'-out '+'Combined_seq_blast_'+target+'.xml '+'-outfmt 5')
    xml_file='Combined_seq_blast_'+target+'.xml'
    print '\n'


    filehandle=open(xml_file)
    records=NCBIXML.parse(filehandle)
    records=list(records)
    E_thresh=1e-10
    show_result()
     

  elif len(blast_record[0].alignments)>1 and len(blast_record[1].alignments)>1:
    print 'The gnd and galF gene are both splited on different contigs of your submitted sequence' +'\n'
    for record in blast_record:
      for alignment in record.alignments:
        if len(alignment.hsps)!=1:
          print '$$$ No O_type result, please check the number of hsp:  some alignment have more than 1 hsp (galF or gnd sequences has one more hits in tested genome), that\'s unusual for our short sequence gnd and galF, please check your submited sequence'+'\n'
          break
        
    print 'Each alignment has one hsps'+'\n'
    outfile=open(output,'w')

    for alignment in blast_record[0].alignments:
      for contig in target_seq:
        if contig.description==alignment.hit_def and len(alignment.hsps[0].sbjct)!=len(contig.seq):
          hitstart=alignment.hsps[0].sbjct_start
          hitend=alignment.hsps[0].sbjct_end
          if hitstart>hitend:
            buffer=hitstart
            hitstart=hitend
            hitend=buffer
          potential1=contig.seq[0:hitstart-1].tostring()
          title1='>'+target.split('.')[0]+' potential rfb region:'+alignment.hit_def+':0 to '+str(hitstart-1)+'_total:'+str(len(potential1))+'bp'
          potential2=contig.seq[hitend+1:].tostring()
          title2='>'+target.split('.')[0]+' potential rfb region:'+alignment.hit_def+':'+str(hitend+1)+' to contig end'+'_total:'+str(len(potential2))+'bp'
          aim_sequence=potential1
          title=title1
          if len(potential1)<len(potential2):
            aim_sequence=potential2
            title=title2
          outfile.write(title)
          outfile.write('\n')
          outfile.write(aim_sequence)
          outfile.write('\n')

        
    for alignment in blast_record[1].alignments:
      for contig in target_seq:
        if contig.description==alignment.hit_def and len(alignment.hsps[0].sbjct)!=len(contig.seq):
          hitstart=alignment.hsps[0].sbjct_start
          hitend=alignment.hsps[0].sbjct_end
          if hitstart>hitend:
            buffer=hitstart
            hitstart=hitend
            hitend=buffer
          potential1=contig.seq[0:hitstart-1].tostring()
          title1='>'+target.split('.')[0]+' potential rfb region:'+alignment.hit_def+':0 to '+str(hitstart-1)+'_total:'+str(len(potential1))+'bp'
          potential2=contig.seq[hitend+1:].tostring()
          title2='>'+target.split('.')[0]+' potential rfb region:'+alignment.hit_def+':'+str(hitend+1)+' to contig end'+'_total:'+str(len(potential2))+'bp'
          aim_sequence=potential1
          title=title1
          if len(potential1)<len(potential2):
            aim_sequence=potential2
            title=title2
          outfile.write(title)
          outfile.write('\n')
          outfile.write(aim_sequence)
          outfile.write('\n')

         
    outfile.close()

    os.system('makeblastdb -in '+database+' -out '+database+'_db '+'-dbtype nucl')
    os.system('blastn -query '+output+' -db '+database+'_db '+'-out '+'Blast_Otype_'+target+'.xml '+'-outfmt 5')
    xml_file='Blast_Otype_'+target+'.xml'
    print '\n'
    filehandle=open(xml_file)
    records=NCBIXML.parse(filehandle)
    records=list(records)
    realrecord1=records[0]
    if len(records[1].alignments)>len(records[0].alignments):
      realrecord1=records[1]
    realrecord2=records[2]
    if len(records[3].alignments)>len(records[2].alignments):
      realrecord2=records[3]
    title='>'+records[0].query.split(':')[0]+'_combined_potential_sequences'
    filecontent=SeqIO.parse(output,'fasta')
    filecontent=list(filecontent)
            
    for contig in filecontent:
      if contig.description==realrecord1.query:
        sequence=contig.seq.tostring()

    for contig in filecontent:
      if contig.description==realrecord2.query:
        sequence=sequence+contig.seq.tostring()
         

    outfile=open('combined_sequence.fasta','w')
    outfile.write(title)
    outfile.write('\n')
    outfile.write(sequence)
    outfile.close()
                
    os.system('blastn -query combined_sequence.fasta'+' -db '+database+'_db '+'-out '+'Combined_seq_blast_'+target+'.xml '+'-outfmt 5')
    xml_file='Combined_seq_blast_'+target+'.xml'
    print '\n'


    filehandle=open(xml_file)
    records=NCBIXML.parse(filehandle)
    records=list(records)
    E_thresh=1e-10
    show_result()



else:
  print '$$$ $$$ No O_type result, Attention: unusual number of hits, no hits for galF or gnd! Check blast output...'+'\n'


os.system('rm '+target+'_db.'+'*')