Skip to content
Snippets Groups Projects
Commit 5ab332fc authored by Thomas Wacquet's avatar Thomas Wacquet
Browse files

init

parent 53410a69
No related branches found
No related tags found
No related merge requests found
import re
class IOAlignment:
"""Classe pour le parsing de fichier fasta et de configuration"""
def __init__(self, Fi):
self.__fi = Fi
def ParseFasta(self):
"""Parse un fichier fasta et retourne un dictionnaire contenant les fasta par labels
Returns:
-------
res : dict
dictionnaire des séquences issus du fichier fasta avec comme clés les labels
"""
f = open(self.__fi, "r")
currentSeq = ""
res = dict()
for l in f:
l = l.rstrip()
if l[0] == ">":
# si la ligne commence par '>', stocker le label
currentSeq = l[1:]
res[currentSeq] = ""
else:
# sinon ajouter la séquence ADN au label correspondant
if currentSeq == "":
raise TypeError
if not re.match("[ACTG]*", "ATGC"):
raise TypeError
res[currentSeq] += l
return res
# MABS-2 # Projet-MABS-2
...@@ -15,14 +15,14 @@ Already a pro? Just edit this README.md and make it your own. Want to make it ea ...@@ -15,14 +15,14 @@ Already a pro? Just edit this README.md and make it your own. Want to make it ea
``` ```
cd existing_repo cd existing_repo
git remote add origin https://gitlab.univ-lille.fr/thomas.wacquet.etu/mabs-2.git git remote add origin https://gitlab.univ-lille.fr/thomas.wacquet.etu/projet-mabs-2.git
git branch -M main git branch -M main
git push -uf origin main git push -uf origin main
``` ```
## Integrate with your tools ## Integrate with your tools
- [ ] [Set up project integrations](https://gitlab.univ-lille.fr/thomas.wacquet.etu/mabs-2/-/settings/integrations) - [ ] [Set up project integrations](https://gitlab.univ-lille.fr/thomas.wacquet.etu/projet-mabs-2/-/settings/integrations)
## Collaborate with your team ## Collaborate with your team
......
>lcl|NC_007357.1_cds_YP_308664.1_1 [gene=PB2] [locus_tag=FLUAVH5N1_s1gp1] [db_xref=GeneID:3654615] [protein=polymerase] [protein_id=YP_308664.1] [location=28..2307] [gbkey=CDS]
ATGGAAAGAATAAAAGAACTAAGAGATCTAATGTCGCAGTCCCGCACTCGCGAGATACTAACAAAAACCACTGTGGATCA
TATGGCCATAATCAAGAAATACACATCAGGAAGACAAGAGAAGAACCCTGCTCTCAGAATGAAATGGATGATGGCAATGA
AATATCCAATCACAGCAGACAAGAGAATAATGGAGATGATTCCTGAAAGGAATGAGCAAGGACAAACGCTTTGGAGCAAG
ACAAATGATGCTGGGTCGGACAGAGTGATGGTGTCTCCCCTAGCTGTAACTTGGTGGAACAGGAATGGGCCGACAACAAG
TACAGTCCATTATCCAAAGGTTTACAAAACATACTTTGAGAAGGTTGAAAGGTTAAAACATGGAACCTTCGGTCCCGTTC
ATTTCCGAAACCAAGTTAAAATACGTCGCCGGGTGGATATAAACCCGGGCCATGCAGATCTCAGTGCTAAAGAAGCACAA
GATGTTATCATGGAGGTCGTTTTCCCAAATGAAGTGGGAGCTAGAATATTGACATCAGAGTCGCAATTGACAATAACAAA
AGAGAAGAAAGAAGAGCTCCAGGATTGTAAAATTGCTCCTTTAATGGTGGCATACATGTTGGAAAGAGAACTGGTCCGCA
AAACCAGATTTCTACCGGTAGCAGGCGGAACAAGCAGTGTGTACATTGAGGTATTGCATTTGACTCAAGGGACCTGTTGG
GAACAGATGTACACTCCCGGCGGAGAAGTAAGAAATGATGATGTTGACCAGAGTTTGATCATCGCTGCCAGAAACATTGT
TAGGAGAGCAACAGTATCAGCGGACCCACTGGCATCACTCTTGGAGATGTGTCACAGCACACAAATTGGGGGAATAAGGA
TGGTGGACATCCTTAGGCAAAACCCAACTGAGGAGCAAGCTGTGGATATATGCAAAGCAGCAATGGGTTTGAGGATCAGT
TCATCCTTTAGCTTTGGAGGCTTCACTTTCAAAAGAACAAATGGATCATCCGTCAAGAAGGAAGAGGAAGTGCTTACAGG
CAACCTCCAAACATTGAAAATAAAAGTACATGAGGGGTATGAAGAATTCACAATGGTTGGGCGGAGAGCAACAGCTATCC
TGAGGAAAGCAACTAGAAGGCTGATTCAGTTGATAGTAAGTGGAAGAGATGAACAATCAATCGCTGAAGCGATCATTGTA
GCAATGGTGTTCTCACAGGAGGATTGCATGATAAAGGCAGTCCGAGGCGATCTGAATTTCGTGAACAGAGCAAACCAAAG
ATTGAACCCCATGCATCAACTCCTGAGGCACTTCCAAAAAGATGCAAAAGTGCTGTTTCAGAACTGGGGAATTGAACCTA
TTGACAATGTCATGGGGATGATCGGAATATTACCTGACATGACTCCAAGCGCAGAGATGTCACTGAGAGGAGTGAGAGTT
AGTAAGATGGGAGTAGATGAATATTCCAGCACGGAGAGAGTGGTGGTGAGTATTGACCGTTTCTTGAGGGTCCGAGATCA
GCAGGGGAACGTACTCTTATCTCCTGAAGAGGTTAGTGAAACACAGGGAACAGAGAAGTTGACAATAACATATTCATCCT
CAATGATGTGGGAAATCAACGGTCCTGAGTCAGTGCTTGTTAACACTTATCAATGGATCATCAGGAATTGGGAGACTGTA
AAGATTCAATGGTCTCAAGATCCCACAATGCTGTACAATAAGATGGAGTTTGAATCGTTCCAATCCTTGGTGCCAAAGGC
TGCCAGAAGCCAATATAGTGGATTTGTGAGAACACTATTCCAACAGATGCGTGATGTTTTGGGGACATTTGATACTGTCC
AAATAATCAAGCTGCTACCATTTGCAGCAGCCCCACCGGAGCCGAGCAGAATGCAGTTTTCTTCTCTAACTGTGAATGTG
AGAGGCTCAGGAATGAGAATACTCGTGAGGGGTAACTCCCCCGTGTTCAACTACAACAAGGCAACCAAAAGGCTTACAGT
CCTCGGAAAGGACGCAGGTGCATTAACAGAAGATCCAGACGAGGGAACAGCCGGGGTGGAATCTGCAGTATTGAGGGGAT
TCCTAATTCTAGGCAGAGAGGACAAAAGATATGGACCCGCATTGAGCATCAATGAACTGAGCAATCTTGCAAAAGGGGAG
AAGGCTAATGTATTGATAATGCAAGGAGACGTGGTGTTGGTAATGAAACGGAAACGGGACTTTAGCATACTTACTGACAG
CCAGACAGCGACCAAAAGAATTCGGATGGCCATCAATTAG
>lcl|NC_007358.1_cds_YP_308665.1_2 [gene=PB1] [locus_tag=FLUAVH5N1_s2gp1] [db_xref=GeneID:3654616] [protein=polymerase] [protein_id=YP_308665.1] [location=25..2298] [gbkey=CDS]
ATGGATGTCAATCCGACTTTACTTTTCTTAAAAGTGCCAGCGCAAAATGCTATAAGTACCACATTCCCTTATACTGGAGA
TCCTCCATACAGCCATGGAACAGGAACAGGATACACCATGGACACAGTCAACAGAACACATCAATATTCAGAAAAGGGGA
AATGGACAACGAACACAGAGACTGGAGCACCCCAACTCAATCCGATTGATGGACCACTACCTGAGGATAATGAGCCGAGT
GGGTATGCACAAACAGATTGTGTATTGGAAGCAATGGCTTTCCTTGAAGAATCCCACCCAGGGATCTTTGAAAACTCGTG
TCTTGAAACGATGGAAGTTGTTCAGCAAACAAGAGTGGATAAGCTGACCCAAGGTCGCCAAACCTATGACTGGACATTGA
AAAGAAACCAGCCGGCTGCAACCGCTTTGGCCAACACTATAGAGGTCTTCAGATCGAATGGTCTAACAGCCAATGAATCG
GGAAGGCTAATAGATTTCCTCAAAGACGTGATGGAATCAATGGATAAGGGAGAAATGGAAATAATAACACATTTCCAGAG
AAAGAGAAGAGTGAGGGACAACATGACCAAGAAAATGGTCACACAAAGAACAATAGGGAAGAAAAAACAAAGGCTGAACA
AAAGGAGCTACCTAATAAGAGCACTGACACTGAACACAATGACAAAAGACGCAGAAAGAGGCAAATTGAAGAGGCGGGCA
ATTGCAACACCCGGGATGCAAATCAGAGGATTCGTGTACTTTGTCGAAACACTAGCGAGGAGTATCTGTGAGAAACTTGA
GCAATCTGGACTCCCCGTCGGAGGGAATGAAAAGAAGGCTAAATTGGCAAATGTCGTGAGGAAGATGATGACTAACTCAC
AAGATACAGAGCTCTCTTTTACAATTACTGGAGACAACACCAAATGGAATGAGAATCAGAACCCTCGGATGTTTCTAGCA
ATGATAACATACATCACAAGGAACCAACCTGAATGGTTTAGAAATGTCTTAAGCATTGCTCCTATAATGTTCTCAAACAA
GATGGCAAGATTAGGGAAAGGATACATGTTCGAAAGTAAGAGCATGAAGCTACGGACACAAATACCAGCAGAAATGCTTG
CAAGCATTGACTTGAAATACTTCAACGAATCAACGAGAAAGAAAATCGAGAAAATAAGACCTCTACTAATAGATGGCACA
GCCTCATTGAGTCCTGGAATGATGATGGGCATGTTCAATATGCTGAGTACAGTCTTAGGAGTTTCAATCCTGAATCTTGG
GCAGAAGAGGTACACCAAAACCACATACTGGTGGGACGGACTCCAATCCTCTGATGATTTCGCTCTCATAGTGAATGCAC
CAAATCATGAGGGAATAGAAGCAGGGGTGGATAGGTTCTATAGGACTTGCAAACTAGTTGGAATCAATATGACCAAGAAG
AAGTCTTACATAAATCGGACAGGAACATGTGAATTCACAAGCTTCTTCTACCGCTATGGGTTCGTAGCCAACTTCAGTAT
GGAGCTGCCCAGCTTTGGAGTGTCTGGGATTAATGAATCGGCTGACATGAGCATTGGTGTTACAGTGATAAAGAACAATA
TGATGGACAACGACCTTGGACCAGCAACAGCTCAGATGGCTCTTCAGCTATTCATTAAGGACTACAGATACCCATACCGA
TGCCACAGGGGGGATACACAAATCCAAACGAGGAGATCATTCGAGCTGAAGAAGCTGTGGGAGCAGACCCGCTCAAAGGC
AGGACTGTTGGTTTCAGATGGAGGACCAAACCCATACAATATCCGGAATCTCCACATTCCGGAGGCTGGCTTGAAGTGGG
AATTGATGGATGAAGACTACCAGGGCAGACTGTGTAATCCTCTGAACCCGTTTGTTAGTCATAAGGAAATTGAGTCTGTC
AACAATGCTGTGGTAATGCCAGCTCATGGCCCAGCCAAGAGCATGGAATATGATGCAGTTGCGACTACACATTCATGGAT
TCCCAAGAGGAATCGTTCCATTCTCAACACCAGCCAAAGGGGGATTCTTGAGGATGAACAGATGTATCAGAAGTGCTGCA
ATCTATTCGAGAAATTCTTCCCTAGCAGTTCATATCGGAGGCCAGTTGGAATTTCCAGCATGGTGGAGGCCATGGTGTCT
AGGGCCCGAATTGATGCACGAATTGACTTCGAGTCTGGAAGGATTAAGAAAGAAGAGTTTGCTGAGATCATGAAGATCTG
TTCCACCATTGAAGAGCTCGGACGGCAAAAATAG
>lcl|NC_007358.1_cds_YP_473348.1_3 [gene=PB1-F2] [locus_tag=FLUAVH5N1_s2gp2] [db_xref=GeneID:3896424] [protein=PB1-F2 protein] [protein_id=YP_473348.1] [location=119..391] [gbkey=CDS]
ATGGAACAGGAACAGGATACACCATGGACACAGTCAACAGAACACATCAATATTCAGAAAAGGGGAAATGGACAACGAAC
ACAGAGACTGGAGCACCCCAACTCAATCCGATTGATGGACCACTACCTGAGGATAATGAGCCGAGTGGGTATGCACAAAC
AGATTGTGTATTGGAAGCAATGGCTTTCCTTGAAGAATCCCACCCAGGGATCTTTGAAAACTCGTGTCTTGAAACGATGG
AAGTTGTTCAGCAAACAAGAGTGGATAAGCTGA
>lcl|NC_007359.1_cds_YP_308666.1_4 [gene=PA] [locus_tag=FLUAVH5N1_s3gp1] [db_xref=GeneID:3654617] [protein=polymerase] [protein_id=YP_308666.1] [location=25..2175] [gbkey=CDS]
ATGGAAGACTTTGTGCGACAATGCTTCAATCCAATGATTGTCGAGCTTGCGGAAAAGGCAATGAAAGAATATGGGGAAGA
TCCGAAAATCGAAACGAACAAATTTGCCGCAATATGCACGCACTTAGAAGTCTGTTTCATGTATTCAGATTTCCACTTTA
TTGATGAACGGGGCGAATCAACAATTATAGAATCTGGCGATCCCAATGCATTATTGAAACACCGGTTTGAAATAATCGAA
GGGAGGGACCGAACAATGGCCTGGACAGTGGTGAATAGTATCTGCAACACCACAGGAGTTGAGAAGCCTAAATTTCTCCC
AGATTTGTATGACTACAAGGAGAACCGATTTATTGAAATTGGAGTGACACGGAGGGAAGTTCACACATACTATCTAGAAA
AAGCCAACAAGATAAAATCTGAGAAGACACACATTCACATATTCTCATTCACTGGAGAGGAAATGGCCACCAAAGCGGAC
TACACCCTTGATGAAGAAAGCAGGGCCCGAATCAAAACCAGGCTGTTCACTATAAGGCAGGAAATGGCCAGTAGGGGTTT
ATGGGATTCCTTTCGTCAGTCCGAGAGAGGCGAAGAGACAGTTGAAGAAAGATTTGAAATCACAGGGACTATGTGCAGGC
TTGCCGACCAAAGTCTCCCACCTAATTTCTCCAGCCTTGAAAAATTTAGAGCCTATGTGGATGGATTCGAACCGAACGGC
TGCATTGAGGGCAAGCTTTCTCAAATGTCGAAAGAAGTAAACGCCAGAATTGAGCCATTTCTGAAGACAACACCACGCCC
TCTTAGATTACCTGATGGGCCTCCCTGCTCTCAGCGGTCGAAGTTTTTGCTGATGGATGCCCTTAAATTAAGCATCGAAG
ACCCGAGTCATGAGGGGGAGGGGATACCGCTATATGATGCAATCAAATGCATGAAAACATTTTTCGGCTGGAAAGAGCCC
AACATTGTAAAACCACATGAAAAAGGCATAAACCCCAATTACCTCCTGGCTTGGAAGCAGGTGCTGGCAGAGCTCCAAGA
TATTGAAAACGAGGAGAAAATTCCAAAGACAAAGAACATGAGGAAAACAAGCCAATTGAAGTGGGCACTTGGTGAGAATA
TGGCACCAGAGAAAGTAGACTTTGAGGATTGCAAAGATGTTAGCGATCTAAGGCAGTATGACAGTGATGAACCAAAGCCT
AGATCACTAGCAAGCTGGATCCAGAGTGAATTCAACAAGGCATGCGAATTGACAGATTCAAGTTGGATTGAACTTGATGA
AATAGGGGAAGACGTTGCTCCAATTGAGCACATTGCAAGTATGAGAAGGAACTATTTCACAGCGGAAGTATCCCATTGCA
GGGCTACTGAATACATAATGAAGGGAGTGTACATAAACACAGCTTTGTTGAATGCATCCTGTGCAGCCATGGATGACTTC
CAACTGATCCCAATGATAAGCAAATGCAGAACCAAAGAAGGAAGACGGAAAACTAACCTGTATGGATTCCTTATAAAAGG
AAGATCCCATTTGAGAAATGACACCGATGTGGTAAACTTTGTGAGTATGGAATTCTCTCTTACTGATCCGAGGCTGGAGC
CACACAGATGGGAAAAGTACTGCGTTCTTCGGATAGGAGACATGCTCTTACGGACTGAAATAGGCCAAGTGTCAAGGCCC
ATGTTTCTTTATGTGAGAACCAATGGAACCTCCAAGATCAAGATGAAATGGGGCATGGAAATGAGGCGATGCCCTTTTCA
ATCCCTTCAACAGATTGAGAGCATGATTGAGGCCGAGTCTTCTGTCAAAGAAAAAGACATGACTAAAGAATTCTTTGAAA
ACAAATCAGAAACATGGCCAATTGGAGAATCACCCAAGGGAGTGGAGGAAGGCTCCATCGGGAAGGTGTGCAGAACCTTA
CTGGCTAAATCTGTTTTCAACAGTCTATATGCATCTCCACAACTCGAGGGGTTTTCAGCTGAATCAAGAAAATTGCTTCT
CATTGTTCAGGCACTTAGGGACAACCTGGAACCTGGAACCTTCGATCTTGGGGGGCTATATGAAGCAATTGAGGAGTGCC
TGATTAATGATCCCTGGGTTTTGCTTAATGCATCTTGGTTCAACTCCTTCCTCACACATGCACTAAGATAG
>lcl|NC_007359.1_cds_YP_006495800.1_5 [gene=PA-X] [locus_tag=FLUAVH5N1_s3gp2] [db_xref=GeneID:13229455] [protein=PA-X protein] [exception=ribosomal slippage] [protein_id=YP_006495800.1] [location=join(25..594,596..784)] [gbkey=CDS]
ATGGAAGACTTTGTGCGACAATGCTTCAATCCAATGATTGTCGAGCTTGCGGAAAAGGCAATGAAAGAATATGGGGAAGA
TCCGAAAATCGAAACGAACAAATTTGCCGCAATATGCACGCACTTAGAAGTCTGTTTCATGTATTCAGATTTCCACTTTA
TTGATGAACGGGGCGAATCAACAATTATAGAATCTGGCGATCCCAATGCATTATTGAAACACCGGTTTGAAATAATCGAA
GGGAGGGACCGAACAATGGCCTGGACAGTGGTGAATAGTATCTGCAACACCACAGGAGTTGAGAAGCCTAAATTTCTCCC
AGATTTGTATGACTACAAGGAGAACCGATTTATTGAAATTGGAGTGACACGGAGGGAAGTTCACACATACTATCTAGAAA
AAGCCAACAAGATAAAATCTGAGAAGACACACATTCACATATTCTCATTCACTGGAGAGGAAATGGCCACCAAAGCGGAC
TACACCCTTGATGAAGAAAGCAGGGCCCGAATCAAAACCAGGCTGTTCACTATAAGGCAGGAAATGGCCAGTAGGGGTTT
ATGGGATTCCTTCGTCAGTCCGAGAGAGGCGAAGAGACAGTTGAAGAAAGATTTGAAATCACAGGGACTATGTGCAGGCT
TGCCGACCAAAGTCTCCCACCTAATTTCTCCAGCCTTGAAAAATTTAGAGCCTATGTGGATGGATTCGAACCGAACGGCT
GCATTGAGGGCAAGCTTTCTCAAATGTCGAAAGAAGTAA
>lcl|NC_007362.1_cds_YP_308669.1_6 [gene=HA] [locus_tag=FLUAVH5N1_s4gp1] [db_xref=GeneID:3654620] [protein=hemagglutinin] [protein_id=YP_308669.1] [location=22..1728] [gbkey=CDS]
ATGGAGAAAATAGTGCTTCTTCTTGCAATAGTCAGTCTTGTCAAAAGTGATCAGATTTGCATTGGTTACCATGCAAACAA
CTCGACAGAGCAGGTTGACACAATAATGGAAAAGAACGTTACTGTTACACATGCCCAAGACATACTGGAAAAGACACACA
ATGGGAAGCTCTGCGATCTAAATGGAGTGAAGCCTCTCATTTTGAGAGATTGTAGTGTAGCTGGATGGCTCCTCGGAAAC
CCTATGTGTGACGAATTCATCAATGTGCCGGAATGGTCTTACATAGTGGAGAAGGCCAGTCCAGCCAATGACCTCTGTTA
CCCAGGGGATTTCAACGACTATGAAGAACTGAAACACCTATTGAGCAGAACAAACCATTTTGAGAAAATTCAGATCATCC
CCAAAAGTTCTTGGTCCAATCATGATGCCTCATCAGGGGTGAGCTCAGCATGTCCATACCATGGGAGGTCCTCCTTTTTC
AGAAATGTGGTATGGCTTATCAAAAAGAACAGTGCATACCCAACAATAAAGAGGAGCTACAATAATACCAACCAAGAAGA
TCTTTTAGTACTGTGGGGGATTCACCATCCTAATGATGCGGCAGAGCAGACAAAGCTCTATCAAAACCCAACCACTTACA
TTTCCGTTGGAACATCAACACTGAACCAGAGATTGGTTCCAGAAATAGCTACTAGACCCAAAGTAAACGGGCAAAGTGGA
AGAATGGAGTTCTTCTGGACAATTTTAAAGCCGAATGATGCCATCAATTTCGAGAGTAATGGAAATTTCATTGCTCCAGA
ATATGCATACAAAATTGTCAAGAAAGGGGACTCAGCAATTATGAAAAGTGAATTGGAATATGGTAACTGCAACACCAAGT
GTCAAACTCCAATGGGGGCGATAAACTCTAGTATGCCATTCCACAACATACACCCCCTCACCATCGGGGAATGCCCCAAA
TATGTGAAATCAAACAGATTAGTCCTTGCGACTGGACTCAGAAATACCCCTCAGAGAGAGAGAAGAAGAAAAAAGAGAGG
ACTATTTGGAGCTATAGCAGGTTTTATAGAGGGAGGATGGCAGGGAATGGTAGATGGTTGGTATGGGTACCACCATAGCA
ATGAGCAGGGGAGTGGATACGCTGCAGACAAAGAATCCACTCAAAAGGCAATAGATGGAGTCACCAATAAGGTCAACTCG
ATCATTGACAAAATGAACACTCAGTTTGAGGCCGTTGGAAGGGAATTTAATAACTTGGAAAGGAGGATAGAGAATTTAAA
CAAGCAGATGGAAGACGGATTCCTAGATGTCTGGACTTATAATGCTGAACTTCTGGTTCTCATGGAAAATGAGAGAACTC
TAGACTTTCATGACTCAAATGTCAAGAACCTTTATGACAAGGTCCGACTACAGCTTAGGGATAATGCAAAGGAGCTGGGT
AATGGTTGTTTCGAGTTCTATCACAAATGTGATAATGAATGTATGGAAAGTGTAAAAAACGGAACGTATGACTACCCGCA
GTATTCAGAAGAAGCAAGACTAAACAGAGAGGAAATAAGTGGAGTAAAATTGGAATCAATGGGAACTTACCAAATACTGT
CAATTTATTCAACAGTGGCGAGTTCCCTAGCACTGGCAATCATGGTAGCTGGTCTATCTTTATGGATGTGCTCCAATGGA
TCGTTACAATGCAGAATTTGCATTTAA
>lcl|NC_007360.1_cds_YP_308667.1_7 [gene=NP] [locus_tag=FLUAVH5N1_s5gp1] [db_xref=GeneID:3654618] [protein=nucleocapsid protein] [protein_id=YP_308667.1] [location=46..1542] [gbkey=CDS]
ATGGCGTCTCAGGGCACCAAACGATCTTATGAACAGATGGAAACTGGTGGAGAACGCCAGAATGCTACTGAGATCAGAGC
ATCTGTTGGAAGAATGGTTGGTGGAATTGGGAGGTTTTATATACAGATGTGCACTGAACTCAAACTCAGCGACTATGAAG
GAAGGCTGATTCAGAACAGCATAACAATAGAGAGAATGGTTCTCTCTGCATTTGATGAAAGGAGGAACAAATACCTGGAA
GAACATCCCAGTGCGGGGAAGGACCCAAAGAAAACTGGAGGTCCAATCTACCGAAGAAGAGACGGAAAATGGGTGAGAGA
GCTGATTCTGTATGACAAAGAGGAGATCAGGAGAATTTGGCGTCAAGCGAACAATGGAGAAGATGCAACTGCTGGTCTCA
CTCACATGATGATCTGGCATTCCAATCTAAATGATGCCACATACCAGAGAACAAGAGCTCTCGTGCGTACTGGGATGGAC
CCTAGAATGTGCTCTCTGATGCAAGGATCAACTCTCCCGAGGAGATCTGGAGCTGCTGGTGCGGCAGTAAAGGGAGTCGG
AACGATGGTGATGGAACTAATTCGGATGATAAAGCGAGGGATTAACGATCGGAATTTCTGGAGAGGTGAAAATGGGCGAA
GAACAAGAATTGCATATGAGAGAATGTGCAACATCCTCAAAGGGAAATTCCAAACAGCAGCACAAAGAGCAATGATGGAT
CAGGTACGGGAAAGCAGAAATCCTGGGAATGCTGAGATTGAAGATCTCATATTTCTGGCACGGTCTGCACTCATCCTGAG
AGGATCAGTGGCCCACAAGTCCTGCTTGCCTGCTTGTGTGTACGGGCTTGCCGTGGCCAGTGGATATGACTTTGAGAGAG
AAGGGTACTCTCTGGTCGGGATTGATCCTTTCCGTCTGCTGCAAAACAGCCAGGTCTTTAGTCTAATTAGACCAAATGAG
AATCCAGCACATAAAAGTCAATTGGTGTGGATGGCATGCCATTCTGCAGCATTTGAAGATCTGAGAGTCTCAAGCTTCAT
CAGAGGGACAAGAGTGGCCCCAAGGGGACAACTATCTACTAGAGGAGTTCAAATTGCTTCAAATGAGAACATGGAAACAA
TGGACTCCAGCACTCTTGAACTGAGAAGCAGATATTGGGCTATAAGGACCAGGAGTGGAGGAAACACCAACCAGCAGAGA
GCATCTGCAGGACAAATCAGTGTGCAGCCTACTTTCTCGGTACAGAGAAATCTTCCCTTCGAAAGAGCGACCATTATGGC
GGCATTCACAGGGAATACAGAGGGCAGAACATCTGACATGAGGACTGAAATCATAAGGATGATGGAAAGCTCCAGACCAG
AAGATGTGTCTTTCCAGGGGCGGGGAGTCTTCGAGCTCTCGGACGAAAAGGCAACGAACCCGATCGTGCCTTCCTTTGAC
ATGAGTAATGAAGGATCTTATTTCTTCGGAGACAATGCAGAGGAATATGACAATTGA
>lcl|NC_007361.1_cds_YP_308668.1_8 [gene=NA] [locus_tag=FLUAVH5N1_s6gp1] [db_xref=GeneID:3654619] [protein=neuraminidase] [protein_id=YP_308668.1] [location=21..1430] [gbkey=CDS]
ATGAATCCAAATCAGAAGATAATAACCATTGGATCAATCTGTATGGTAGTTGGGATAATTAGCTTGATGTTACAAATTGG
GAACATAATCTCAATATGGGTCAGTCATTCAATTCAGACAGGGAATCAACACCAAGCTGAACCATGCAATCAAAGCATTA
TTACTTATGAAAACAACACCTGGGTAAATCAAACATATGTCAACATCAGCAATACCAATTTTCTTACTGAAAAAGCTGTG
GCTTCAGTAACATTAGCGGGCAATTCATCTCTTTGCCCCATTAGCGGATGGGCTGTACACAGTAAGGACAACGGTATAAG
AATCGGTTCCAAGGGGGATGTGTTTGTTATAAGAGAGCCGTTCATCTCATGCTCCCACTTGGAATGCAGAACTTTCTTTT
TGACTCAGGGAGCCTTGCTGAATGACAAGCACTCCAATGGGACCGTCAAAGACAGAAGCCCTCACAGAACATTGATGAGT
TGTCCTGTGGGTGAGGCTCCCTCCCCATATAACTCAAGGTTTGAGTCTGTTGCTTGGTCGGCAAGTGCTTGCCATGATGG
CACCAGTTGGTTGACAATTGGAATTTCTGGCCCAGACAATGGGGCTGTGGCTGTATTGAAATACAACGGCATAATAACAG
ACACTATCAAGAGTTGGAGGAACAACATACTGAGAACTCAAGAGTCTGAATGTGCATGTGTAAATGGCTCTTGCTTTACT
GTAATGACTGACGGACCAAGTAATGGGCAGGCCTCATATAAGATCTTCAAAATGGAAAAAGGGAAAGTAGTTAAATCAGT
CGAATTGAATGCCCCTAATTATCACTATGAGGAGTGCTCCTGTTATCCTGATGCTGGCGAAATCACATGTGTGTGCAGGG
ATAATTGGCATGGCTCAAATCGGCCATGGGTATCTTTCAATCAAAATTTGGAGTATCAAATAGGATATATATGCAGTGGA
GTTTTCGGAGACAATCCACGCCCCAATGATGGAACAGGCAGTTGTGGTCCGGTGTCCCCTAACGGGGCATATGGAGTAAA
AGGGTTTTCATTTAAATACGGCAATGGTGTTTGGATCGGGAGAACCAAAAGCACTAATTCCAGGAGCGGCTTTGAAATGA
TTTGGGATCCAAATGGGTGGACTGGAACGGACAGTAGCTTCTCGGTGAAACAAGATATCGTAGCAATAACTGATTGGTCA
GGATATAGCGGGAGTTTTGTCCAGCATCCAGAACTGACAGGATTAGATTGCATAAGACCTTGTTTCTGGGTTGAGCTAAT
CAGAGGGCGGCCCAAAGAGAGCACAATTTGGACTAGTGGGAGCAGCATATCTTTTTGTGGTGTAAATAGTGACACTGTGG
GTTGGTCTTGGCCAGACGATGCCGAGTTGCCATTCACCATTGACAAGTAG
>lcl|NC_007363.1_cds_YP_308670.1_9 [gene=M2] [locus_tag=FLUAVH5N1_s7gp1] [db_xref=GeneID:3654621] [protein=matrix protein 2] [protein_id=YP_308670.1] [location=join(26..51,740..1007)] [gbkey=CDS]
ATGAGTCTTCTAACCGAGGTCGAAACGCCTACCAAAAACGAATGGGAGTGCAAATGCAGCGATTCAAGTGATCCTCTTGT
TGTTGCCGCAAGTATCATTGGGATACTGCACTTGATATTGTGGATTCTTGATCGTCTTTTCTTCAAATGCATTTATCGTC
GCCTTAAATACGGTTTGAAAAGAGGGCCTTCTACGGAAGGGGTACCTGAGTCTATGAGGGAAGAGTATCGGCAGGAACAG
CAGAGTGCTGTGGATGTTGACGATGGTCATTTTGTCAACATAGAGCTGGAGTAA
>lcl|NC_007363.1_cds_YP_308671.1_10 [gene=M1] [locus_tag=FLUAVH5N1_s7gp2] [db_xref=GeneID:8656647] [protein=matrix protein 1] [protein_id=YP_308671.1] [location=26..784] [gbkey=CDS]
ATGAGTCTTCTAACCGAGGTCGAAACGTACGTTCTCTCTATCGTCCCGTCAGGCCCCCTCAAAGCCGAGATCGCGCAGAG
ACTTGAGGATGTCTTTGCAGGAAAGAACACCGATCTCGAGGCTCTCATGGAATGGCTAAAGACAAGACCAATCCTGTCAC
CTCTGACTAAAGGGATTTTAGGATTTGTGTTCACGCTCACCGTGCCCAGTGAGCGAGGACTGCAGCGTAGACGCTTTGTC
CAGAATGCCTTAAATGGAAATGGAGATCCAAACAATATGGATAGGGCAGTTAAGCTATACAAGAAGCTGAAAAGAGAAAT
AACATTCCATGGGGCTAAGGAGGTCGCACTCAGCTACTCAACCGGTGCACTTGCCAGTTGTATGGGTCTCATATACAACA
GGATGGGAACGGTGACCACAGAAGTGGCTTTTGGCCTAGTGTGTGCCACTTGTGAGCAGATTGCAGATTCACAGCATCGG
TCTCACAGACAGATGGCAACTACCACCAACCCACTAATCAGGCATGAGAACAGAATGGTGCTGGCCAGCACTACAGCTAA
GGCTATGGAGCAGATGGCTGGATCGAGTGAGCAGGCAGCGGAAGCCATGGAGGTTGCTAGTCAGGCTAGGCAGATGGTGC
AGGCAATGAGGACAATTGGGACTCATCCTAGCTCCAGTGCCGGTCTGAAAGATAATCTTCTTGAAAATTTGCAGGCCTAC
CAAAAACGAATGGGAGTGCAAATGCAGCGATTCAAGTGA
>lcl|NC_007364.1_cds_YP_308672.1_11 [gene=NEP] [locus_tag=FLUAVH5N1_s8gp1] [db_xref=GeneID:3654622] [protein=nonstructural protein 2] [protein_id=YP_308672.1] [location=join(15..44,517..852)] [gbkey=CDS]
ATGGATTCCAACACGATAACCTCGTTTCAGGACATTTTACAGAGGATGTCAAAAATGCAATTGGAATCCTCATCGGTGGA
CTTGAATGGAATGATAACTCAATTCGAGCGTCTGAAAATATACAGAGATTCGCTTGGGGAATCCATGATGAGAATGGGGG
ACCTTCACTCCCTCCAAAACAGAAACGCTACATGGCGAAACGAGTTGAGTCAGAAGTTTGAAGAGATCAGATGGCTCATT
GCTGAATGTAGAAATATACTGACAAAGACTGAAAATAGCTTTGAACAGATAACATTTTTGCAAGCATTGCAACTCTTACT
TGAAGTTGAGAGTGAGATAAGGACCTTCTCTTTTCAGCTTATTTAA
>lcl|NC_007364.1_cds_YP_308673.1_12 [gene=NS1] [locus_tag=FLUAVH5N1_s8gp2] [db_xref=GeneID:8656648] [protein=nonstructural protein 1] [protein_id=YP_308673.1] [location=15..707] [gbkey=CDS]
ATGGATTCCAACACGATAACCTCGTTTCAGGTAGATTGTTATCTATGGCACATAAGAAAGCTACTCAGTATGAGAGACAT
GTGTGATGCCCCCTTTGATGACAGGCTCCGAAGAGACCAAAAGGCATTAAAGGGAAGAGGCAGCACACTTGGACTCGATT
TAAGAGTGGCTACAATGGAGGGGAAAAAGATCGTTGAGGACATCCTGAAGAGTGAGACAAATGAAAACCTCAAAATAGCC
ATTGCTTCCAGTCCTGCTCCTCGGTATATCACCGATATGAGCATAGAGGAGATGAGCCGAGAATGGTACATGCTGATGCC
TAGGCAGAAAATAACTGGAGGCCTTATGGTGAAAATGGACCAAGCCATAATGGATAAAAGAATTATCCTTAAAGCAAATT
TCTCAGTTCTATTTGATCAACTAGAGACATTAGTCTCTCTGAGGGCATTCACAGAAAGTGGTGCTATTGTGGCTGAAATA
TTTCCCATTCCCTCCGTACCAGGACATTTTACAGAGGATGTCAAAAATGCAATTGGAATCCTCATCGGTGGACTTGAATG
GAATGATAACTCAATTCGAGCGTCTGAAAATATACAGAGATTCGCTTGGGGAATCCATGATGAGAATGGGGGACCTTCAC
TCCCTCCAAAACAGAAACGCTACATGGCGAAACGAGTTGAGTCAGAAGTTTGA
This diff is collapsed.
This diff is collapsed.
>NC_007357.1 Influenza A virus (A/Goose/Guangdong/1/96(H5N1)) polymerase (PB2) gene, complete cds
AGCAAAAGCAGGTCAATTATATTCAATATGGAAAGAATAAAAGAACTAAGAGATCTAATGTCGCAGTCCCGCACTCGCGA
GATACTAACAAAAACCACTGTGGATCATATGGCCATAATCAAGAAATACACATCAGGAAGACAAGAGAAGAACCCTGCTC
TCAGAATGAAATGGATGATGGCAATGAAATATCCAATCACAGCAGACAAGAGAATAATGGAGATGATTCCTGAAAGGAAT
GAGCAAGGACAAACGCTTTGGAGCAAGACAAATGATGCTGGGTCGGACAGAGTGATGGTGTCTCCCCTAGCTGTAACTTG
GTGGAACAGGAATGGGCCGACAACAAGTACAGTCCATTATCCAAAGGTTTACAAAACATACTTTGAGAAGGTTGAAAGGT
TAAAACATGGAACCTTCGGTCCCGTTCATTTCCGAAACCAAGTTAAAATACGTCGCCGGGTGGATATAAACCCGGGCCAT
GCAGATCTCAGTGCTAAAGAAGCACAAGATGTTATCATGGAGGTCGTTTTCCCAAATGAAGTGGGAGCTAGAATATTGAC
ATCAGAGTCGCAATTGACAATAACAAAAGAGAAGAAAGAAGAGCTCCAGGATTGTAAAATTGCTCCTTTAATGGTGGCAT
ACATGTTGGAAAGAGAACTGGTCCGCAAAACCAGATTTCTACCGGTAGCAGGCGGAACAAGCAGTGTGTACATTGAGGTA
TTGCATTTGACTCAAGGGACCTGTTGGGAACAGATGTACACTCCCGGCGGAGAAGTAAGAAATGATGATGTTGACCAGAG
TTTGATCATCGCTGCCAGAAACATTGTTAGGAGAGCAACAGTATCAGCGGACCCACTGGCATCACTCTTGGAGATGTGTC
ACAGCACACAAATTGGGGGAATAAGGATGGTGGACATCCTTAGGCAAAACCCAACTGAGGAGCAAGCTGTGGATATATGC
AAAGCAGCAATGGGTTTGAGGATCAGTTCATCCTTTAGCTTTGGAGGCTTCACTTTCAAAAGAACAAATGGATCATCCGT
CAAGAAGGAAGAGGAAGTGCTTACAGGCAACCTCCAAACATTGAAAATAAAAGTACATGAGGGGTATGAAGAATTCACAA
TGGTTGGGCGGAGAGCAACAGCTATCCTGAGGAAAGCAACTAGAAGGCTGATTCAGTTGATAGTAAGTGGAAGAGATGAA
CAATCAATCGCTGAAGCGATCATTGTAGCAATGGTGTTCTCACAGGAGGATTGCATGATAAAGGCAGTCCGAGGCGATCT
GAATTTCGTGAACAGAGCAAACCAAAGATTGAACCCCATGCATCAACTCCTGAGGCACTTCCAAAAAGATGCAAAAGTGC
TGTTTCAGAACTGGGGAATTGAACCTATTGACAATGTCATGGGGATGATCGGAATATTACCTGACATGACTCCAAGCGCA
GAGATGTCACTGAGAGGAGTGAGAGTTAGTAAGATGGGAGTAGATGAATATTCCAGCACGGAGAGAGTGGTGGTGAGTAT
TGACCGTTTCTTGAGGGTCCGAGATCAGCAGGGGAACGTACTCTTATCTCCTGAAGAGGTTAGTGAAACACAGGGAACAG
AGAAGTTGACAATAACATATTCATCCTCAATGATGTGGGAAATCAACGGTCCTGAGTCAGTGCTTGTTAACACTTATCAA
TGGATCATCAGGAATTGGGAGACTGTAAAGATTCAATGGTCTCAAGATCCCACAATGCTGTACAATAAGATGGAGTTTGA
ATCGTTCCAATCCTTGGTGCCAAAGGCTGCCAGAAGCCAATATAGTGGATTTGTGAGAACACTATTCCAACAGATGCGTG
ATGTTTTGGGGACATTTGATACTGTCCAAATAATCAAGCTGCTACCATTTGCAGCAGCCCCACCGGAGCCGAGCAGAATG
CAGTTTTCTTCTCTAACTGTGAATGTGAGAGGCTCAGGAATGAGAATACTCGTGAGGGGTAACTCCCCCGTGTTCAACTA
CAACAAGGCAACCAAAAGGCTTACAGTCCTCGGAAAGGACGCAGGTGCATTAACAGAAGATCCAGACGAGGGAACAGCCG
GGGTGGAATCTGCAGTATTGAGGGGATTCCTAATTCTAGGCAGAGAGGACAAAAGATATGGACCCGCATTGAGCATCAAT
GAACTGAGCAATCTTGCAAAAGGGGAGAAGGCTAATGTATTGATAATGCAAGGAGACGTGGTGTTGGTAATGAAACGGAA
ACGGGACTTTAGCATACTTACTGACAGCCAGACAGCGACCAAAAGAATTCGGATGGCCATCAATTAGTGTTGAATAGTTT
AAAAACGACCTTGTTTCTACT
>NC_007358.1 Influenza A virus (A/goose/Guangdong/1/1996(H5N1)) polymerase (PB1) and PB1-F2 protein (PB1-F2) genes, complete cds
AGCAAAAGCAGGCAAACCATTTGAATGGATGTCAATCCGACTTTACTTTTCTTAAAAGTGCCAGCGCAAAATGCTATAAG
TACCACATTCCCTTATACTGGAGATCCTCCATACAGCCATGGAACAGGAACAGGATACACCATGGACACAGTCAACAGAA
CACATCAATATTCAGAAAAGGGGAAATGGACAACGAACACAGAGACTGGAGCACCCCAACTCAATCCGATTGATGGACCA
CTACCTGAGGATAATGAGCCGAGTGGGTATGCACAAACAGATTGTGTATTGGAAGCAATGGCTTTCCTTGAAGAATCCCA
CCCAGGGATCTTTGAAAACTCGTGTCTTGAAACGATGGAAGTTGTTCAGCAAACAAGAGTGGATAAGCTGACCCAAGGTC
GCCAAACCTATGACTGGACATTGAAAAGAAACCAGCCGGCTGCAACCGCTTTGGCCAACACTATAGAGGTCTTCAGATCG
AATGGTCTAACAGCCAATGAATCGGGAAGGCTAATAGATTTCCTCAAAGACGTGATGGAATCAATGGATAAGGGAGAAAT
GGAAATAATAACACATTTCCAGAGAAAGAGAAGAGTGAGGGACAACATGACCAAGAAAATGGTCACACAAAGAACAATAG
GGAAGAAAAAACAAAGGCTGAACAAAAGGAGCTACCTAATAAGAGCACTGACACTGAACACAATGACAAAAGACGCAGAA
AGAGGCAAATTGAAGAGGCGGGCAATTGCAACACCCGGGATGCAAATCAGAGGATTCGTGTACTTTGTCGAAACACTAGC
GAGGAGTATCTGTGAGAAACTTGAGCAATCTGGACTCCCCGTCGGAGGGAATGAAAAGAAGGCTAAATTGGCAAATGTCG
TGAGGAAGATGATGACTAACTCACAAGATACAGAGCTCTCTTTTACAATTACTGGAGACAACACCAAATGGAATGAGAAT
CAGAACCCTCGGATGTTTCTAGCAATGATAACATACATCACAAGGAACCAACCTGAATGGTTTAGAAATGTCTTAAGCAT
TGCTCCTATAATGTTCTCAAACAAGATGGCAAGATTAGGGAAAGGATACATGTTCGAAAGTAAGAGCATGAAGCTACGGA
CACAAATACCAGCAGAAATGCTTGCAAGCATTGACTTGAAATACTTCAACGAATCAACGAGAAAGAAAATCGAGAAAATA
AGACCTCTACTAATAGATGGCACAGCCTCATTGAGTCCTGGAATGATGATGGGCATGTTCAATATGCTGAGTACAGTCTT
AGGAGTTTCAATCCTGAATCTTGGGCAGAAGAGGTACACCAAAACCACATACTGGTGGGACGGACTCCAATCCTCTGATG
ATTTCGCTCTCATAGTGAATGCACCAAATCATGAGGGAATAGAAGCAGGGGTGGATAGGTTCTATAGGACTTGCAAACTA
GTTGGAATCAATATGACCAAGAAGAAGTCTTACATAAATCGGACAGGAACATGTGAATTCACAAGCTTCTTCTACCGCTA
TGGGTTCGTAGCCAACTTCAGTATGGAGCTGCCCAGCTTTGGAGTGTCTGGGATTAATGAATCGGCTGACATGAGCATTG
GTGTTACAGTGATAAAGAACAATATGATGGACAACGACCTTGGACCAGCAACAGCTCAGATGGCTCTTCAGCTATTCATT
AAGGACTACAGATACCCATACCGATGCCACAGGGGGGATACACAAATCCAAACGAGGAGATCATTCGAGCTGAAGAAGCT
GTGGGAGCAGACCCGCTCAAAGGCAGGACTGTTGGTTTCAGATGGAGGACCAAACCCATACAATATCCGGAATCTCCACA
TTCCGGAGGCTGGCTTGAAGTGGGAATTGATGGATGAAGACTACCAGGGCAGACTGTGTAATCCTCTGAACCCGTTTGTT
AGTCATAAGGAAATTGAGTCTGTCAACAATGCTGTGGTAATGCCAGCTCATGGCCCAGCCAAGAGCATGGAATATGATGC
AGTTGCGACTACACATTCATGGATTCCCAAGAGGAATCGTTCCATTCTCAACACCAGCCAAAGGGGGATTCTTGAGGATG
AACAGATGTATCAGAAGTGCTGCAATCTATTCGAGAAATTCTTCCCTAGCAGTTCATATCGGAGGCCAGTTGGAATTTCC
AGCATGGTGGAGGCCATGGTGTCTAGGGCCCGAATTGATGCACGAATTGACTTCGAGTCTGGAAGGATTAAGAAAGAAGA
GTTTGCTGAGATCATGAAGATCTGTTCCACCATTGAAGAGCTCGGACGGCAAAAATAGTGAATTTAGCTTGTCCTTCATG
AAAAAATGCCTTGTTTCTACT
>NC_007359.1 Influenza A virus (A/goose/Guangdong/1/1996(H5N1)) polymerase (PA) and PA-X protein (PA-X) genes, complete cds
AGCAAAAGCAGGTACTGATCCAAAATGGAAGACTTTGTGCGACAATGCTTCAATCCAATGATTGTCGAGCTTGCGGAAAA
GGCAATGAAAGAATATGGGGAAGATCCGAAAATCGAAACGAACAAATTTGCCGCAATATGCACGCACTTAGAAGTCTGTT
TCATGTATTCAGATTTCCACTTTATTGATGAACGGGGCGAATCAACAATTATAGAATCTGGCGATCCCAATGCATTATTG
AAACACCGGTTTGAAATAATCGAAGGGAGGGACCGAACAATGGCCTGGACAGTGGTGAATAGTATCTGCAACACCACAGG
AGTTGAGAAGCCTAAATTTCTCCCAGATTTGTATGACTACAAGGAGAACCGATTTATTGAAATTGGAGTGACACGGAGGG
AAGTTCACACATACTATCTAGAAAAAGCCAACAAGATAAAATCTGAGAAGACACACATTCACATATTCTCATTCACTGGA
GAGGAAATGGCCACCAAAGCGGACTACACCCTTGATGAAGAAAGCAGGGCCCGAATCAAAACCAGGCTGTTCACTATAAG
GCAGGAAATGGCCAGTAGGGGTTTATGGGATTCCTTTCGTCAGTCCGAGAGAGGCGAAGAGACAGTTGAAGAAAGATTTG
AAATCACAGGGACTATGTGCAGGCTTGCCGACCAAAGTCTCCCACCTAATTTCTCCAGCCTTGAAAAATTTAGAGCCTAT
GTGGATGGATTCGAACCGAACGGCTGCATTGAGGGCAAGCTTTCTCAAATGTCGAAAGAAGTAAACGCCAGAATTGAGCC
ATTTCTGAAGACAACACCACGCCCTCTTAGATTACCTGATGGGCCTCCCTGCTCTCAGCGGTCGAAGTTTTTGCTGATGG
ATGCCCTTAAATTAAGCATCGAAGACCCGAGTCATGAGGGGGAGGGGATACCGCTATATGATGCAATCAAATGCATGAAA
ACATTTTTCGGCTGGAAAGAGCCCAACATTGTAAAACCACATGAAAAAGGCATAAACCCCAATTACCTCCTGGCTTGGAA
GCAGGTGCTGGCAGAGCTCCAAGATATTGAAAACGAGGAGAAAATTCCAAAGACAAAGAACATGAGGAAAACAAGCCAAT
TGAAGTGGGCACTTGGTGAGAATATGGCACCAGAGAAAGTAGACTTTGAGGATTGCAAAGATGTTAGCGATCTAAGGCAG
TATGACAGTGATGAACCAAAGCCTAGATCACTAGCAAGCTGGATCCAGAGTGAATTCAACAAGGCATGCGAATTGACAGA
TTCAAGTTGGATTGAACTTGATGAAATAGGGGAAGACGTTGCTCCAATTGAGCACATTGCAAGTATGAGAAGGAACTATT
TCACAGCGGAAGTATCCCATTGCAGGGCTACTGAATACATAATGAAGGGAGTGTACATAAACACAGCTTTGTTGAATGCA
TCCTGTGCAGCCATGGATGACTTCCAACTGATCCCAATGATAAGCAAATGCAGAACCAAAGAAGGAAGACGGAAAACTAA
CCTGTATGGATTCCTTATAAAAGGAAGATCCCATTTGAGAAATGACACCGATGTGGTAAACTTTGTGAGTATGGAATTCT
CTCTTACTGATCCGAGGCTGGAGCCACACAGATGGGAAAAGTACTGCGTTCTTCGGATAGGAGACATGCTCTTACGGACT
GAAATAGGCCAAGTGTCAAGGCCCATGTTTCTTTATGTGAGAACCAATGGAACCTCCAAGATCAAGATGAAATGGGGCAT
GGAAATGAGGCGATGCCCTTTTCAATCCCTTCAACAGATTGAGAGCATGATTGAGGCCGAGTCTTCTGTCAAAGAAAAAG
ACATGACTAAAGAATTCTTTGAAAACAAATCAGAAACATGGCCAATTGGAGAATCACCCAAGGGAGTGGAGGAAGGCTCC
ATCGGGAAGGTGTGCAGAACCTTACTGGCTAAATCTGTTTTCAACAGTCTATATGCATCTCCACAACTCGAGGGGTTTTC
AGCTGAATCAAGAAAATTGCTTCTCATTGTTCAGGCACTTAGGGACAACCTGGAACCTGGAACCTTCGATCTTGGGGGGC
TATATGAAGCAATTGAGGAGTGCCTGATTAATGATCCCTGGGTTTTGCTTAATGCATCTTGGTTCAACTCCTTCCTCACA
CATGCACTAAGATAGTTGTGGCAATGCTACTATTTGCTATCCATACTGTCCAAAAAAGTACCTTGTTTCTACT
>NC_007362.1 Influenza A virus (A/goose/Guangdong/1/1996(H5N1)) hemagglutinin (HA) gene, complete cds
GCAGGGGTATAATCTGTCAAAATGGAGAAAATAGTGCTTCTTCTTGCAATAGTCAGTCTTGTCAAAAGTGATCAGATTTG
CATTGGTTACCATGCAAACAACTCGACAGAGCAGGTTGACACAATAATGGAAAAGAACGTTACTGTTACACATGCCCAAG
ACATACTGGAAAAGACACACAATGGGAAGCTCTGCGATCTAAATGGAGTGAAGCCTCTCATTTTGAGAGATTGTAGTGTA
GCTGGATGGCTCCTCGGAAACCCTATGTGTGACGAATTCATCAATGTGCCGGAATGGTCTTACATAGTGGAGAAGGCCAG
TCCAGCCAATGACCTCTGTTACCCAGGGGATTTCAACGACTATGAAGAACTGAAACACCTATTGAGCAGAACAAACCATT
TTGAGAAAATTCAGATCATCCCCAAAAGTTCTTGGTCCAATCATGATGCCTCATCAGGGGTGAGCTCAGCATGTCCATAC
CATGGGAGGTCCTCCTTTTTCAGAAATGTGGTATGGCTTATCAAAAAGAACAGTGCATACCCAACAATAAAGAGGAGCTA
CAATAATACCAACCAAGAAGATCTTTTAGTACTGTGGGGGATTCACCATCCTAATGATGCGGCAGAGCAGACAAAGCTCT
ATCAAAACCCAACCACTTACATTTCCGTTGGAACATCAACACTGAACCAGAGATTGGTTCCAGAAATAGCTACTAGACCC
AAAGTAAACGGGCAAAGTGGAAGAATGGAGTTCTTCTGGACAATTTTAAAGCCGAATGATGCCATCAATTTCGAGAGTAA
TGGAAATTTCATTGCTCCAGAATATGCATACAAAATTGTCAAGAAAGGGGACTCAGCAATTATGAAAAGTGAATTGGAAT
ATGGTAACTGCAACACCAAGTGTCAAACTCCAATGGGGGCGATAAACTCTAGTATGCCATTCCACAACATACACCCCCTC
ACCATCGGGGAATGCCCCAAATATGTGAAATCAAACAGATTAGTCCTTGCGACTGGACTCAGAAATACCCCTCAGAGAGA
GAGAAGAAGAAAAAAGAGAGGACTATTTGGAGCTATAGCAGGTTTTATAGAGGGAGGATGGCAGGGAATGGTAGATGGTT
GGTATGGGTACCACCATAGCAATGAGCAGGGGAGTGGATACGCTGCAGACAAAGAATCCACTCAAAAGGCAATAGATGGA
GTCACCAATAAGGTCAACTCGATCATTGACAAAATGAACACTCAGTTTGAGGCCGTTGGAAGGGAATTTAATAACTTGGA
AAGGAGGATAGAGAATTTAAACAAGCAGATGGAAGACGGATTCCTAGATGTCTGGACTTATAATGCTGAACTTCTGGTTC
TCATGGAAAATGAGAGAACTCTAGACTTTCATGACTCAAATGTCAAGAACCTTTATGACAAGGTCCGACTACAGCTTAGG
GATAATGCAAAGGAGCTGGGTAATGGTTGTTTCGAGTTCTATCACAAATGTGATAATGAATGTATGGAAAGTGTAAAAAA
CGGAACGTATGACTACCCGCAGTATTCAGAAGAAGCAAGACTAAACAGAGAGGAAATAAGTGGAGTAAAATTGGAATCAA
TGGGAACTTACCAAATACTGTCAATTTATTCAACAGTGGCGAGTTCCCTAGCACTGGCAATCATGGTAGCTGGTCTATCT
TTATGGATGTGCTCCAATGGATCGTTACAATGCAGAATTTGCATTTAAATTTGTGAGTTCAGATTGTAGTTAAAAACACC
>NC_007360.1 Influenza A virus (A/Goose/Guangdong/1/96(H5N1)) nucleocapsid protein (NP) gene, complete cds
AGCAAAAGCAGGGTAGATAATCACTCACTGAGTGACATCAACATCATGGCGTCTCAGGGCACCAAACGATCTTATGAACA
GATGGAAACTGGTGGAGAACGCCAGAATGCTACTGAGATCAGAGCATCTGTTGGAAGAATGGTTGGTGGAATTGGGAGGT
TTTATATACAGATGTGCACTGAACTCAAACTCAGCGACTATGAAGGAAGGCTGATTCAGAACAGCATAACAATAGAGAGA
ATGGTTCTCTCTGCATTTGATGAAAGGAGGAACAAATACCTGGAAGAACATCCCAGTGCGGGGAAGGACCCAAAGAAAAC
TGGAGGTCCAATCTACCGAAGAAGAGACGGAAAATGGGTGAGAGAGCTGATTCTGTATGACAAAGAGGAGATCAGGAGAA
TTTGGCGTCAAGCGAACAATGGAGAAGATGCAACTGCTGGTCTCACTCACATGATGATCTGGCATTCCAATCTAAATGAT
GCCACATACCAGAGAACAAGAGCTCTCGTGCGTACTGGGATGGACCCTAGAATGTGCTCTCTGATGCAAGGATCAACTCT
CCCGAGGAGATCTGGAGCTGCTGGTGCGGCAGTAAAGGGAGTCGGAACGATGGTGATGGAACTAATTCGGATGATAAAGC
GAGGGATTAACGATCGGAATTTCTGGAGAGGTGAAAATGGGCGAAGAACAAGAATTGCATATGAGAGAATGTGCAACATC
CTCAAAGGGAAATTCCAAACAGCAGCACAAAGAGCAATGATGGATCAGGTACGGGAAAGCAGAAATCCTGGGAATGCTGA
GATTGAAGATCTCATATTTCTGGCACGGTCTGCACTCATCCTGAGAGGATCAGTGGCCCACAAGTCCTGCTTGCCTGCTT
GTGTGTACGGGCTTGCCGTGGCCAGTGGATATGACTTTGAGAGAGAAGGGTACTCTCTGGTCGGGATTGATCCTTTCCGT
CTGCTGCAAAACAGCCAGGTCTTTAGTCTAATTAGACCAAATGAGAATCCAGCACATAAAAGTCAATTGGTGTGGATGGC
ATGCCATTCTGCAGCATTTGAAGATCTGAGAGTCTCAAGCTTCATCAGAGGGACAAGAGTGGCCCCAAGGGGACAACTAT
CTACTAGAGGAGTTCAAATTGCTTCAAATGAGAACATGGAAACAATGGACTCCAGCACTCTTGAACTGAGAAGCAGATAT
TGGGCTATAAGGACCAGGAGTGGAGGAAACACCAACCAGCAGAGAGCATCTGCAGGACAAATCAGTGTGCAGCCTACTTT
CTCGGTACAGAGAAATCTTCCCTTCGAAAGAGCGACCATTATGGCGGCATTCACAGGGAATACAGAGGGCAGAACATCTG
ACATGAGGACTGAAATCATAAGGATGATGGAAAGCTCCAGACCAGAAGATGTGTCTTTCCAGGGGCGGGGAGTCTTCGAG
CTCTCGGACGAAAAGGCAACGAACCCGATCGTGCCTTCCTTTGACATGAGTAATGAAGGATCTTATTTCTTCGGAGACAA
TGCAGAGGAATATGACAATTGAAGAAAAATACCCTTGTTTCTACT
>NC_007361.1 Influenza A virus (A/Goose/Guangdong/1/96(H5N1)) neuraminidase (NA) gene, complete cds
AGCAAAAGCAGGAGATTAAAATGAATCCAAATCAGAAGATAATAACCATTGGATCAATCTGTATGGTAGTTGGGATAATT
AGCTTGATGTTACAAATTGGGAACATAATCTCAATATGGGTCAGTCATTCAATTCAGACAGGGAATCAACACCAAGCTGA
ACCATGCAATCAAAGCATTATTACTTATGAAAACAACACCTGGGTAAATCAAACATATGTCAACATCAGCAATACCAATT
TTCTTACTGAAAAAGCTGTGGCTTCAGTAACATTAGCGGGCAATTCATCTCTTTGCCCCATTAGCGGATGGGCTGTACAC
AGTAAGGACAACGGTATAAGAATCGGTTCCAAGGGGGATGTGTTTGTTATAAGAGAGCCGTTCATCTCATGCTCCCACTT
GGAATGCAGAACTTTCTTTTTGACTCAGGGAGCCTTGCTGAATGACAAGCACTCCAATGGGACCGTCAAAGACAGAAGCC
CTCACAGAACATTGATGAGTTGTCCTGTGGGTGAGGCTCCCTCCCCATATAACTCAAGGTTTGAGTCTGTTGCTTGGTCG
GCAAGTGCTTGCCATGATGGCACCAGTTGGTTGACAATTGGAATTTCTGGCCCAGACAATGGGGCTGTGGCTGTATTGAA
ATACAACGGCATAATAACAGACACTATCAAGAGTTGGAGGAACAACATACTGAGAACTCAAGAGTCTGAATGTGCATGTG
TAAATGGCTCTTGCTTTACTGTAATGACTGACGGACCAAGTAATGGGCAGGCCTCATATAAGATCTTCAAAATGGAAAAA
GGGAAAGTAGTTAAATCAGTCGAATTGAATGCCCCTAATTATCACTATGAGGAGTGCTCCTGTTATCCTGATGCTGGCGA
AATCACATGTGTGTGCAGGGATAATTGGCATGGCTCAAATCGGCCATGGGTATCTTTCAATCAAAATTTGGAGTATCAAA
TAGGATATATATGCAGTGGAGTTTTCGGAGACAATCCACGCCCCAATGATGGAACAGGCAGTTGTGGTCCGGTGTCCCCT
AACGGGGCATATGGAGTAAAAGGGTTTTCATTTAAATACGGCAATGGTGTTTGGATCGGGAGAACCAAAAGCACTAATTC
CAGGAGCGGCTTTGAAATGATTTGGGATCCAAATGGGTGGACTGGAACGGACAGTAGCTTCTCGGTGAAACAAGATATCG
TAGCAATAACTGATTGGTCAGGATATAGCGGGAGTTTTGTCCAGCATCCAGAACTGACAGGATTAGATTGCATAAGACCT
TGTTTCTGGGTTGAGCTAATCAGAGGGCGGCCCAAAGAGAGCACAATTTGGACTAGTGGGAGCAGCATATCTTTTTGTGG
TGTAAATAGTGACACTGTGGGTTGGTCTTGGCCAGACGATGCCGAGTTGCCATTCACCATTGACAAGTAGTTTGTTCAAA
AAACTCCTTGTTTCTACT
>NC_007363.1 Influenza A virus (A/goose/Guangdong/1/1996(H5N1)) segment 7, complete sequence
AGCAAAAGCAGGTAGATATTGAAAAATGAGTCTTCTAACCGAGGTCGAAACGTACGTTCTCTCTATCGTCCCGTCAGGCC
CCCTCAAAGCCGAGATCGCGCAGAGACTTGAGGATGTCTTTGCAGGAAAGAACACCGATCTCGAGGCTCTCATGGAATGG
CTAAAGACAAGACCAATCCTGTCACCTCTGACTAAAGGGATTTTAGGATTTGTGTTCACGCTCACCGTGCCCAGTGAGCG
AGGACTGCAGCGTAGACGCTTTGTCCAGAATGCCTTAAATGGAAATGGAGATCCAAACAATATGGATAGGGCAGTTAAGC
TATACAAGAAGCTGAAAAGAGAAATAACATTCCATGGGGCTAAGGAGGTCGCACTCAGCTACTCAACCGGTGCACTTGCC
AGTTGTATGGGTCTCATATACAACAGGATGGGAACGGTGACCACAGAAGTGGCTTTTGGCCTAGTGTGTGCCACTTGTGA
GCAGATTGCAGATTCACAGCATCGGTCTCACAGACAGATGGCAACTACCACCAACCCACTAATCAGGCATGAGAACAGAA
TGGTGCTGGCCAGCACTACAGCTAAGGCTATGGAGCAGATGGCTGGATCGAGTGAGCAGGCAGCGGAAGCCATGGAGGTT
GCTAGTCAGGCTAGGCAGATGGTGCAGGCAATGAGGACAATTGGGACTCATCCTAGCTCCAGTGCCGGTCTGAAAGATAA
TCTTCTTGAAAATTTGCAGGCCTACCAAAAACGAATGGGAGTGCAAATGCAGCGATTCAAGTGATCCTCTTGTTGTTGCC
GCAAGTATCATTGGGATACTGCACTTGATATTGTGGATTCTTGATCGTCTTTTCTTCAAATGCATTTATCGTCGCCTTAA
ATACGGTTTGAAAAGAGGGCCTTCTACGGAAGGGGTACCTGAGTCTATGAGGGAAGAGTATCGGCAGGAACAGCAGAGTG
CTGTGGATGTTGACGATGGTCATTTTGTCAACATAGAGCTGGAGTAAAAAACTACCTTGTTTCTACT
>NC_007364.1 Influenza A virus (A/goose/Guangdong/1/1996(H5N1)) segment 8, complete sequence
GTGACAAAGACATAATGGATTCCAACACGATAACCTCGTTTCAGGTAGATTGTTATCTATGGCACATAAGAAAGCTACTC
AGTATGAGAGACATGTGTGATGCCCCCTTTGATGACAGGCTCCGAAGAGACCAAAAGGCATTAAAGGGAAGAGGCAGCAC
ACTTGGACTCGATTTAAGAGTGGCTACAATGGAGGGGAAAAAGATCGTTGAGGACATCCTGAAGAGTGAGACAAATGAAA
ACCTCAAAATAGCCATTGCTTCCAGTCCTGCTCCTCGGTATATCACCGATATGAGCATAGAGGAGATGAGCCGAGAATGG
TACATGCTGATGCCTAGGCAGAAAATAACTGGAGGCCTTATGGTGAAAATGGACCAAGCCATAATGGATAAAAGAATTAT
CCTTAAAGCAAATTTCTCAGTTCTATTTGATCAACTAGAGACATTAGTCTCTCTGAGGGCATTCACAGAAAGTGGTGCTA
TTGTGGCTGAAATATTTCCCATTCCCTCCGTACCAGGACATTTTACAGAGGATGTCAAAAATGCAATTGGAATCCTCATC
GGTGGACTTGAATGGAATGATAACTCAATTCGAGCGTCTGAAAATATACAGAGATTCGCTTGGGGAATCCATGATGAGAA
TGGGGGACCTTCACTCCCTCCAAAACAGAAACGCTACATGGCGAAACGAGTTGAGTCAGAAGTTTGAAGAGATCAGATGG
CTCATTGCTGAATGTAGAAATATACTGACAAAGACTGAAAATAGCTTTGAACAGATAACATTTTTGCAAGCATTGCAACT
CTTACTTGAAGTTGAGAGTGAGATAAGGACCTTCTCTTTTCAGCTTATTTAATACTAAAAAACAC
>lcl|NC_007357.1_cds_YP_308664.1_1 [gene=PB2] [locus_tag=FLUAVH5N1_s1gp1] [db_xref=GeneID:3654615] [protein=polymerase] [protein_id=YP_308664.1] [location=28..2307] [gbkey=CDS]
ATGGAAAGAATAAAAGAACTAAGAGATCTAATGTCGCAGTCCCGCACTCGCGAGATACTAACAAAAACCACTGTGGATCA
TATGGCCATAATCAAGAAATACACATCAGGAAGACAAGAGAAGAACCCTGCTCTCAGAATGAAATGGATGATGGCAATGA
AATATCCAATCACAGCAGACAAGAGAATAATGGAGATGATTCCTGAAAGGAATGAGCAAGGACAAACGCTTTGGAGCAAG
kmc.md 0 → 100644
Résultats kmc avec k-mer de 27:
Stage 1: 100%
Stage 2: 100%
1st stage: 12.2781s
2nd stage: 24.6005s
Total : 36.8785s
Tmp size : 1809MB
Stats:
No. of k-mers below min. threshold : 243017427
No. of k-mers above max. threshold : 0
No. of unique k-mers : 315431169
No. of unique counted k-mers : 72413742
Total no. of k-mers : 1644690745
Total no. of reads : 28282964
Total no. of super-k-mers : 186607305
main.py 0 → 100644
import gzip
import re
from IOalignment import IOAlignment
from Bio import SeqIO
from Bio.Seq import Seq
import parasail
label = "NC_007357.1 Influenza A virus (A/Goose/Guangdong/1/96(H5N1)) polymerase (PB2) gene, complete cds"
# seqs = ParseFasta("data/grippe.fasta")
# seq = seqs[label]
# print([i for i in range(len(seq))])
# for record in SeqIO.parse("example.fasta", "fasta"):
# print(record.id)
# for line in file.readlines():
# print(line)
# Lectures des reads :
reads = {}
with gzip.open('SRR10971381_1.fastq.gz', 'rt') as fastq:
counter = 0
for i, record in enumerate(SeqIO.parse(fastq, 'fastq'), start=1):
reads[i] = (str(record.seq))
counter+=1
if counter >= 1000:
break
# Lectures de Grippe A :
genes_grippe = IOAlignment('data/covid.fna')
genes_grippe = genes_grippe.ParseFasta() # ID long => garder l'entier en entier ?
#label = 'lcl|NC_007357.1_cds_YP_308664.1_1 [gene=PB2] [locus_tag=FLUAVH5N1_s1gp1] [db_xref=GeneID:3654615] [protein=polymerase] [protein_id=YP_308664.1] [location=28..2307] [gbkey=CDS]'
#genome_grippe = genomes_grippe[label]
#genome="BGAABCGHJ"
# découpe le genome en n kmers de taille k
def split_kmers(genome, k):
i = 0
res = []
# chaque k lettres, découper un nouveau mot et le placer dans la liste des kmers
for i in range(0, len(genome), k):
res.append(genome[i:i+k])
# si le kmer de fin est trop petit le retirer
if len(res[len(res)-1]) < k:
res.pop()
return res
# crée la table des indices des suffixes triés
def create_suffix_table(genome):
# génère indices de 0 à la taille du génome
indices = [i for i in range(len(genome))]
return sorted(indices, key=get_suffix)
# obtenir le suffixe à la position i
def get_suffix(i):
return gene_grippe[i:]
def search_suffix(genome, suffix_table, kmer, i1, i2):
# kmer non trouvé
if i2 - i1 < 1:
return -1
m = (i1 + i2) // 2
# si le kmer est trouvé au début du suffixe, renvoyer la position
if genome[suffix_table[m]:].startswith(kmer):
return suffix_table[m]
# si le kmer est plus grand que le suffixe du milieu, parcourir la droite du tableau des suffixes
if kmer > genome[suffix_table[m]:]:
return search_suffix(genome, suffix_table, kmer, m+1, i2)
# s'il est plus petit, parcourir la gauche
else:
return search_suffix(genome, suffix_table, kmer, i1, m)
#print(split_kmers(reads[1], 10))
for read in reads.values():
kmers = split_kmers(read, 11)
for gene_grippe in genes_grippe.values():
suffix_table = create_suffix_table(gene_grippe)
for kmer in kmers:
pos = search_suffix(gene_grippe, suffix_table, kmer, 0, len(suffix_table))
if pos != -1:
start = pos - len(read)
end = pos + len(read)
# aligner le genome sur le read (les gaps du genome ne sont pas pénalisés)
# print(str(max(start,0)) + " " + str(min(end, len(gene_grippe)-1)))
# print(str(start) + " " + str(end))
al = parasail.sg_dx_scan_sat(read, gene_grippe[max(start,0):min(end, len(gene_grippe)-1)], 16, 4, parasail.dnafull)
print(al.score / len(read))
al=parasail.sg_dx_trace_scan_sat(gene_grippe[start:end], read, 5, 1, parasail.dnafull)
traceback=al.get_traceback()
print(traceback.ref)
print("--------------------------")
print(traceback.query)
#al=parasail.sg_dx_trace_scan_sat("TGTTGCATCTTCAGCTAGTCTGGAGCAA", genome[m]", 5,1,parasail.dnafull)
#traceback=al.get_traceback()
#print(traceback.ref+""+traceback.query)
\ No newline at end of file
notes.py 0 → 100644
# Comparer reads (humains mais pas uniquement) avec génome (virus)
# Output final = séparer les reads issus et non issus du génome
# Table des suffixes => génome du virus => série de positions => pas stocker listes des suffixes
# Fragmenté read en k-mer => chercher si k-mer dans génome virus
# seed -> extend
# Lecture avec Biopython
#Alignement avec Parasail
from IOalignment import IOAlignment
import gzip
from Bio import SeqIO
from Bio.Seq import Seq
import parasail
sequence_dict = {}
# Lectures des reads :
# with gzip.open('SRR10971381_1.fastq.gz', 'rt') as fastq:
# for i, record in enumerate(SeqIO.parse(fastq, 'fastq'), start=1):
# sequence_dict[i] = (str(record.seq))
# print(sequence_dict)
# Lectures de Grippe A :
# seq = IOAlignment('Grippe_A.fna')
# seq = seq.ParseFasta() # ID long => garder l'entier en entier ?
# print('séq = ', seq)
print('----------------')
# [ i for i in range(len(genome))]
# from multiprocessing.pool import ThreadPool
# from time import sleep
# from random import random
# array=[("AACG",1),("AGGA",2),("TGCA",3),("GGAA",4),("ATTT",5),("CCAC",6),("GCAC",7),("TAGG",8)]
# def process(data1,data2):
# print(data1,data2)
# sleep(1)
# pool = ThreadPool(processes=2)
# for data in array:
# pool.apply_async(process,data)
# pool.close()
# pool.join()
# This function performs a semi-global alignment. The "dx" in the function name stands for "do not penalize gaps at the beginning of the query sequence and at the end of the
# database sequence" 1. The "sat" part indicates that the function attempts to allocate profiles for both 8-bit and 16-bit solutions, choosing the appropriate one based on the size of the data 1.
# 5 et 1 gap opening/extension
# parasail.dnafull, specifies the substitution matrix to use for scoring the alignment. parasail.dnafull is a predefined substitution matrix for DNA sequences that includes all possible nucleotide substitutions
al = parasail.sg_dx_scan_sat("TGTTGCATCTTCAGCTAGTCTGGAGCAA", "GGACAGGGCTTTGAGTGGATGTGATGGATCATCACCTACACTGGGAACCCAACGTATACC", 5, 1, parasail.dnafull)
print(al.score)
al=parasail.sg_dx_trace_scan_sat("TGTTGCATCTTCAGCTAGTCTGGAGCAA", "GGACAGGGCTTTGAGTGGATGTGATGGATCATCACCTACACTGGGAACCCAACGTATACC", 5,1,parasail.dnafull)
traceback=al.get_traceback()
print(traceback.ref+"||||||||"+traceback.query)
import tqdm
import time
import tqdm
j=0
for i in tqdm.tqdm(range(int(10e8))):
j=j+i
progress=tqdm(total=100)
for i in range(100):
time.sleep(.1) #Justepourralentirleprogrammepourvoirl"effetdetqdm
progress.update(1)
progress.close()
def compare_my_tuples(tuple):
return tuple[1]
l=[(2.5,"A"),(4.1,"D"),(1.1,"C"),(3.7,"F"),(2.8,"B")]
sorted(l,key=compare_my_tuples)
parasail.png

215 KiB

File added
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
import parasail
al = parasail.sg_dx_scan_sat("TGTTGCATCTTCAGCTAGTCTGGAGCAA", "GGACAGGGCTTTGAGTGGATGTGATGGATCATCACCTACACTGGGAACCCAACGTATACC", 5, 1, parasail.dnafull)
print(al.score)
al=parasail.sg_dx_trace_scan_sat("TGTTGCATCTTCAGCTAGTCTGGAGCAA", "GGACAGGGCTTTGAGTGGATGTGATGGATCATCACCTACACTGGGAACCCAACGTATACC", 5,1,parasail.dnafull)
traceback=al.get_traceback()
print(traceback.ref+""+traceback.query)
```
%% Output
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
Cell In[1], line 1
----> 1 import parasail
3 al = parasail.sg_dx_scan_sat("TGTTGCATCTTCAGCTAGTCTGGAGCAA", "GGACAGGGCTTTGAGTGGATGTGATGGATCATCACCTACACTGGGAACCCAACGTATACC", 5, 1, parasail.dnafull)
4 print(al.score)
ModuleNotFoundError: No module named 'parasail'
%% Cell type:code id: tags:
``` python
genome_grippe[172]
```
%% Output
'C'
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment