Skip to content
Snippets Groups Projects
Commit 3ad80f72 authored by Pacome Riobe's avatar Pacome Riobe
Browse files

Add new file : sequence

parent 85d7fffc
No related branches found
No related tags found
No related merge requests found
sequence 0 → 100644
import parasail
class Sequence:
"""
represente une sequence ADN
"""
def __init__(self, sequence: str):
"""
constructeur d'un objet Sequence
:param sequence: str, une sequence ADN
"""
self.sequence = sequence
self.taille_kmer = 0
def __len__(self) -> int:
"""
renvoie la taille de l'objet Sequence
:return: int
"""
return len(self.sequence)
def suffix_array(self) -> list:
"""
construit la table des suffixes
:return: list, la table des suffixes
"""
return sorted(range(len(self.sequence)), key=lambda i: self.sequence[i:])
def kmers(self, k) -> list:
"""
renvoie la liste des kmers ; les kmers chevauchants sont filtrés
:param k: int, la taille des kmers
:return: list, les kmers
"""
self.taille_kmer = k
kmers = [] # dict
for i in range(0, (len(self.sequence) - k), int(k/2)):
kmers.append(i)
return kmers
def get_kmer(self, kmer):
"""
renvoie le kmer correspondant à sa position entree en parametre
:param kmer: int, position du kmer
:return:
"""
sequence = self.sequence
return sequence[kmer:self.taille_kmer]
def get_sequence(self) -> str:
"""
renvoie la sequencede l'objet Sequence
:return: str, la sequence
"""
return self.sequence
def search_kmer(kmer: list, genome: str) -> list:
"""
recherche les hits entre la liste de kmers et le genome
:param kmer: list, liste de kmers
:param genome: str, une sequence adn
:return: list, la liste des positions des hits
"""
liste_positions = []
n = 0
table_suffixes = genome.suffix_array()
valeur_suffixe = int(len(table_suffixes) / 2)
while kmer != genome.sequence[valeur_suffixe:valeur_suffixe+len(kmer)]:
if kmer < genome.sequence[valeur_suffixe:valeur_suffixe+len(kmer)]:
valeur_suffixe = int(valeur_suffixe/2)
if kmer > genome.sequence[valeur_suffixe:valeur_suffixe+len(kmer)]:
valeur_suffixe += int((len(table_suffixes) - valeur_suffixe)/2)
if kmer == genome.sequence[valeur_suffixe:valeur_suffixe+len(kmer)]:
val_egale = valeur_suffixe
liste_positions.append(valeur_suffixe)
if val_egale > 0 and kmer == genome.sequence[valeur_suffixe:valeur_suffixe+len(kmer)]:
valeur_suffixe = valeur_suffixe - 1
if kmer == genome.sequence[valeur_suffixe:valeur_suffixe+len(kmer)]:
liste_positions.append(valeur_suffixe)
if val_egale < len(genome):
val_egale = val_egale + 1
if kmer == genome.sequence[val_egale:val_egale+len(kmer)]:
liste_positions.append(val_egale)
return liste_positions
def align_sequences(query: str, genome: str):
"""
realise l'alignement
:param query: str,
:param genome: str,
:return: le score, l'alignement
"""
# alignement semi-global
align_sg = parasail.sg_dx_trace_scan_sat(query, genome, 5, 1, parasail.dnafull)
score = align_sg.score
traceback = align_sg.get_traceback()
# seq align (query = read, ref = génome)
align_seq_query = traceback.query
align_seq_genome = traceback.ref
return score, align_seq_query, align_seq_genome
if __name__ == "__main__":
query = "ATGACCA"
genome = "ATTCATGACCAGGTGCTGTCCAGGCTGAATGACCAGGTCGGATTATAGGCTATGACCAGGATGC"
genome_objet = Sequence(genome)
query_objet = Sequence(query)
suffixe_array = genome_objet.suffix_array()
for value in suffixe_array:
print(genome[value:])
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment