Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
petitebete
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Pacome Riobe
petitebete
Commits
3ad80f72
Commit
3ad80f72
authored
1 month ago
by
Pacome Riobe
Browse files
Options
Downloads
Patches
Plain Diff
Add new file : sequence
parent
85d7fffc
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
sequence
+114
-0
114 additions, 0 deletions
sequence
with
114 additions
and
0 deletions
sequence
0 → 100644
+
114
−
0
View file @
3ad80f72
import parasail
class Sequence:
"""
represente une sequence ADN
"""
def __init__(self, sequence: str):
"""
constructeur d'un objet Sequence
:param sequence: str, une sequence ADN
"""
self.sequence = sequence
self.taille_kmer = 0
def __len__(self) -> int:
"""
renvoie la taille de l'objet Sequence
:return: int
"""
return len(self.sequence)
def suffix_array(self) -> list:
"""
construit la table des suffixes
:return: list, la table des suffixes
"""
return sorted(range(len(self.sequence)), key=lambda i: self.sequence[i:])
def kmers(self, k) -> list:
"""
renvoie la liste des kmers ; les kmers chevauchants sont filtrés
:param k: int, la taille des kmers
:return: list, les kmers
"""
self.taille_kmer = k
kmers = [] # dict
for i in range(0, (len(self.sequence) - k), int(k/2)):
kmers.append(i)
return kmers
def get_kmer(self, kmer):
"""
renvoie le kmer correspondant à sa position entree en parametre
:param kmer: int, position du kmer
:return:
"""
sequence = self.sequence
return sequence[kmer:self.taille_kmer]
def get_sequence(self) -> str:
"""
renvoie la sequencede l'objet Sequence
:return: str, la sequence
"""
return self.sequence
def search_kmer(kmer: list, genome: str) -> list:
"""
recherche les hits entre la liste de kmers et le genome
:param kmer: list, liste de kmers
:param genome: str, une sequence adn
:return: list, la liste des positions des hits
"""
liste_positions = []
n = 0
table_suffixes = genome.suffix_array()
valeur_suffixe = int(len(table_suffixes) / 2)
while kmer != genome.sequence[valeur_suffixe:valeur_suffixe+len(kmer)]:
if kmer < genome.sequence[valeur_suffixe:valeur_suffixe+len(kmer)]:
valeur_suffixe = int(valeur_suffixe/2)
if kmer > genome.sequence[valeur_suffixe:valeur_suffixe+len(kmer)]:
valeur_suffixe += int((len(table_suffixes) - valeur_suffixe)/2)
if kmer == genome.sequence[valeur_suffixe:valeur_suffixe+len(kmer)]:
val_egale = valeur_suffixe
liste_positions.append(valeur_suffixe)
if val_egale > 0 and kmer == genome.sequence[valeur_suffixe:valeur_suffixe+len(kmer)]:
valeur_suffixe = valeur_suffixe - 1
if kmer == genome.sequence[valeur_suffixe:valeur_suffixe+len(kmer)]:
liste_positions.append(valeur_suffixe)
if val_egale < len(genome):
val_egale = val_egale + 1
if kmer == genome.sequence[val_egale:val_egale+len(kmer)]:
liste_positions.append(val_egale)
return liste_positions
def align_sequences(query: str, genome: str):
"""
realise l'alignement
:param query: str,
:param genome: str,
:return: le score, l'alignement
"""
# alignement semi-global
align_sg = parasail.sg_dx_trace_scan_sat(query, genome, 5, 1, parasail.dnafull)
score = align_sg.score
traceback = align_sg.get_traceback()
# seq align (query = read, ref = génome)
align_seq_query = traceback.query
align_seq_genome = traceback.ref
return score, align_seq_query, align_seq_genome
if __name__ == "__main__":
query = "ATGACCA"
genome = "ATTCATGACCAGGTGCTGTCCAGGCTGAATGACCAGGTCGGATTATAGGCTATGACCAGGATGC"
genome_objet = Sequence(genome)
query_objet = Sequence(query)
suffixe_array = genome_objet.suffix_array()
for value in suffixe_array:
print(genome[value:])
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment