Skip to content
Snippets Groups Projects
Commit d46ada65 authored by Pacome Riobe's avatar Pacome Riobe
Browse files

Delete sequence

parent 777aabb2
No related branches found
No related tags found
No related merge requests found
import parasail
class Sequence:
"""
represente une sequence ADN
"""
def __init__(self, sequence: str):
"""
constructeur d'un objet Sequence
:param sequence: str, une sequence ADN
"""
self.sequence = sequence
self.taille_kmer = 0
def __len__(self) -> int:
"""
renvoie la taille de l'objet Sequence
:return: int
"""
return len(self.sequence)
def suffix_array(self) -> list:
"""
construit la table des suffixes
:return: list, la table des suffixes
"""
return sorted(range(len(self.sequence)), key=lambda i: self.sequence[i:])
def kmers(self, k) -> list:
"""
renvoie la liste des kmers ; les kmers chevauchants sont filtrés
:param k: int, la taille des kmers
:return: list, les kmers
"""
self.taille_kmer = k
kmers = [] # dict
for i in range(0, (len(self.sequence) - k), int(k/2)):
kmers.append(i)
return kmers
def get_kmer(self, kmer):
"""
renvoie le kmer correspondant à sa position entree en parametre
:param kmer: int, position du kmer
:return:
"""
sequence = self.sequence
return sequence[kmer:self.taille_kmer]
def get_sequence(self) -> str:
"""
renvoie la sequencede l'objet Sequence
:return: str, la sequence
"""
return self.sequence
def search_kmer(kmer: list, genome: str) -> list:
"""
recherche les hits entre la liste de kmers et le genome
:param kmer: list, liste de kmers
:param genome: str, une sequence adn
:return: list, la liste des positions des hits
"""
liste_positions = []
n = 0
table_suffixes = genome.suffix_array()
valeur_suffixe = int(len(table_suffixes) / 2)
while kmer != genome.sequence[valeur_suffixe:valeur_suffixe+len(kmer)]:
if kmer < genome.sequence[valeur_suffixe:valeur_suffixe+len(kmer)]:
valeur_suffixe = int(valeur_suffixe/2)
if kmer > genome.sequence[valeur_suffixe:valeur_suffixe+len(kmer)]:
valeur_suffixe += int((len(table_suffixes) - valeur_suffixe)/2)
if kmer == genome.sequence[valeur_suffixe:valeur_suffixe+len(kmer)]:
val_egale = valeur_suffixe
liste_positions.append(valeur_suffixe)
if val_egale > 0 and kmer == genome.sequence[valeur_suffixe:valeur_suffixe+len(kmer)]:
valeur_suffixe = valeur_suffixe - 1
if kmer == genome.sequence[valeur_suffixe:valeur_suffixe+len(kmer)]:
liste_positions.append(valeur_suffixe)
if val_egale < len(genome):
val_egale = val_egale + 1
if kmer == genome.sequence[val_egale:val_egale+len(kmer)]:
liste_positions.append(val_egale)
return liste_positions
def align_sequences(query: str, genome: str):
"""
realise l'alignement
:param query: str,
:param genome: str,
:return: le score, l'alignement
"""
# alignement semi-global
align_sg = parasail.sg_dx_trace_scan_sat(query, genome, 5, 1, parasail.dnafull)
score = align_sg.score
traceback = align_sg.get_traceback()
# seq align (query = read, ref = génome)
align_seq_query = traceback.query
align_seq_genome = traceback.ref
return score, align_seq_query, align_seq_genome
if __name__ == "__main__":
query = "ATGACCA"
genome = "ATTCATGACCAGGTGCTGTCCAGGCTGAATGACCAGGTCGGATTATAGGCTATGACCAGGATGC"
genome_objet = Sequence(genome)
query_objet = Sequence(query)
suffixe_array = genome_objet.suffix_array()
for value in suffixe_array:
print(genome[value:])
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment