Skip to content
Snippets Groups Projects
Commit 777aabb2 authored by Pacome Riobe's avatar Pacome Riobe
Browse files

Upload New File : alignment

parent 3ad80f72
No related branches found
No related tags found
No related merge requests found
from Sequence import *
from reads import *
from recherche import *
class Alignement:
def __init__(self, query, genome, k):
"""
:param query: obj Sequence
:param genome: obj Sequence
"""
self.query = query.get_sequence()
self.genome = genome.get_sequence()
self.kmers = query.kmers(k)
self.sufftable = genome.suffix_array()
self.taille_kmer = k
def get_taille_kmer(self):
"""
:return:
"""
return self.taille_kmer
def search_seed(self):
"""
cherche aussi les hits
:param kmer_list:
:param genome:
:return:
"""
k = self.get_taille_kmer()
liste = []
genome = self.genome
query = self.query
suff = self.sufftable
for kmer in self.kmers:
print(kmer)
debut, fin = 0, len(suff) - 1
m = None
while debut <= fin and m is None:
milieu = int((debut + fin) // 2)
if genome[suff[milieu]: suff[milieu] + k] == query[kmer:kmer + k]:
if milieu not in liste:
liste.append([kmer, suff[milieu]])
m = "trouve"
elif genome[suff[milieu]: suff[milieu] + k] < query[kmer:kmer + k]:
debut = milieu + 1
else:
fin = milieu - 1
return liste
def align_sequences(self):
"""
:param query:
:param genome:
:return:
liste = []
seeds = self.search_seed()
# alignement semi-global
for seed in seeds:
genome = self.genome[seed:self.taille_kmer + seed]
align_sg = parasail.sg_dx_trace_scan_sat(query, genome, 5, 1, parasail.dnafull)
score = align_sg.score
traceback = align_sg.get_traceback()
# seq align (query = read, ref = génome)
align_seq_query = traceback.query
align_seq_genome = traceback.ref
liste.append(score, align_seq_query, align_seq_genome)
"""
seeds = self.search_seed()
print(seeds)
best_score = float("-inf")
best_alignment = ("", "")
k = self.taille_kmer
for kmer, pos in seeds:
maxi = min(kmer + (len(self.genome) - pos), len(self.query))
mini = max(0, kmer - pos)
genome_segment = self.genome[0:len(self.genome)]
query_segment = self.query[mini: maxi]
align_result = parasail.sg_dx_trace_scan_sat(query_segment, genome_segment, 5, 1, parasail.dnafull)
if align_result.score > best_score:
best_score = align_result.score
traceback = align_result.get_traceback()
best_alignment = (traceback.query, traceback.ref)
if best_alignment == ("", ""):
return None
return best_score, best_alignment[0], best_alignment[1]
if __name__ == "__main__":
kmer = 3
query = "ATGGAT"
query_objet = Sequence(query)
genome = "ATGATGATGATG"
genome_objet = Sequence(genome)
suff = genome_objet.suffix_array()
kmers = query_objet.kmers(kmer)
print(kmers)
query_objet = Sequence(query)
alignement = Alignement(query_objet, genome_objet, kmer)
seeds = alignement.search_seed()
print(seeds)
for value in seeds:
print(alignement.align_sequences())
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment