Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
MABS-2
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Thomas Wacquet
MABS-2
Commits
922db3a6
Commit
922db3a6
authored
1 year ago
by
Thomas Wacquet
Browse files
Options
Downloads
Patches
Plain Diff
some changes in classes
parent
5ab332fc
No related branches found
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
main.py
+128
-63
128 additions, 63 deletions
main.py
with
128 additions
and
63 deletions
main.py
+
128
−
63
View file @
922db3a6
...
...
@@ -17,7 +17,7 @@ label = "NC_007357.1 Influenza A virus (A/Goose/Guangdong/1/96(H5N1)) polymerase
# Lectures des reads :
reads
=
{}
with
gzip
.
open
(
'
SRR10971381_1.fastq
.gz
'
,
'
rt
'
)
as
fastq
:
with
gzip
.
open
(
'
data/covid
.gz
'
,
'
rt
'
)
as
fastq
:
counter
=
0
for
i
,
record
in
enumerate
(
SeqIO
.
parse
(
fastq
,
'
fastq
'
),
start
=
1
):
reads
[
i
]
=
(
str
(
record
.
seq
))
...
...
@@ -35,34 +35,36 @@ genes_grippe = genes_grippe.ParseFasta() # ID long => garder l'entier en entier
#genome="BGAABCGHJ"
# Genome
## - genome
## + create_suffix_table()
## + search_suffix()
## + get_suffix()
# Reads
## +
## + split_k_mers(k)
##
# découpe le genome en n kmers de taille k
def
split_kmers
(
genome
,
k
):
i
=
0
res
=
[]
# chaque k lettres, découper un nouveau mot et le placer dans la liste des kmers
for
i
in
range
(
0
,
len
(
genome
),
k
):
res
.
append
(
genome
[
i
:
i
+
k
])
# si le kmer de fin est trop petit le retirer
if
len
(
res
[
len
(
res
)
-
1
])
<
k
:
res
.
pop
()
return
res
class
Genome
:
def
__init__
(
self
,
seq
):
self
.
__seq
=
seq
self
.
__suffix_table
=
self
.
__create_suffix_table
(
self
)
# crée la table des indices des suffixes triés
def
create_suffix_table
(
genome
):
def
__
create_suffix_table
(
self
):
# génère indices de 0 à la taille du génome
indices
=
[
i
for
i
in
range
(
len
(
genome
))]
return
sorted
(
indices
,
key
=
get_suffix
)
indices
=
[
i
for
i
in
range
(
len
(
self
.
__seq
))]
return
sorted
(
indices
,
key
=
self
.
__
get_suffix
)
# obtenir le suffixe à la position i
def
get_suffix
(
i
):
return
gene_grippe
[
i
:]
# obtenir le suffixe à la position i
- pour trier correctement
def
__
get_suffix
(
self
,
i
):
return
self
.
__seq
[
i
:]
def
search_suffix
(
genome
,
suffix_table
,
kmer
,
i1
,
i2
):
def
search_indice
(
self
,
seq_to_search
):
return
self
.
__search_suffix
(
seq_to_search
,
0
,
len
(
self
.
__seq
))
def
__search_suffix
(
self
,
seq_to_search
,
i1
,
i2
):
# kmer non trouvé
if
i2
-
i1
<
1
:
return
-
1
...
...
@@ -70,21 +72,84 @@ def search_suffix(genome, suffix_table, kmer, i1, i2):
m
=
(
i1
+
i2
)
//
2
# si le kmer est trouvé au début du suffixe, renvoyer la position
if
genome
[
suffix_table
[
m
]:].
startswith
(
kmer
):
return
suffix_table
[
m
]
if
self
.
__seq
[
self
.
__
suffix_table
[
m
]:].
startswith
(
seq_to_search
):
return
self
.
__
suffix_table
[
m
]
# si le kmer est plus grand que le suffixe du milieu, parcourir la droite du tableau des suffixes
if
kmer
>
genome
[
suffix_table
[
m
]:]:
return
search_suffix
(
genome
,
suffix_table
,
kmer
,
m
+
1
,
i2
)
if
seq_to_search
>
self
.
__seq
[
self
.
__
suffix_table
[
m
]:]:
return
self
.
__
search_suffix
(
self
.
__seq
,
self
.
__suffix_table
,
seq_to_search
,
m
+
1
,
i2
)
# s'il est plus petit, parcourir la gauche
else
:
return
search_suffix
(
genome
,
suffix_table
,
kmer
,
i1
,
m
)
return
self
.
__search_suffix
(
self
.
__seq
,
self
.
__suffix_table
,
seq_to_search
,
i1
,
m
)
# découpe le genome en n kmers de taille k
# def split_kmers(genome, k):
# i = 0
# res = []
# # chaque k lettres, découper un nouveau mot et le placer dans la liste des kmers
# for i in range(0, len(genome), k):
# res.append(genome[i:i+k])
# # si le kmer de fin est trop petit le retirer
# if len(res[len(res)-1]) < k:
# res.pop()
# return res
# crée la table des indices des suffixes triés
# def create_suffix_table(genome):
# génère indices de 0 à la taille du génome
# indices = [i for i in range(len(genome))]
# return sorted(indices, key=get_suffix)
# obtenir le suffixe à la position i
# def get_suffix(i):
# return gene_grippe[i:]
# def search_suffix(genome, kmer, i1, i2):
# # kmer non trouvé
# if i2 - i1 < 1:
# return -1
# m = (i1 + i2) // 2
# # si le kmer est trouvé au début du suffixe, renvoyer la position
# if genome[suffix_table[m]:].startswith(kmer):
# return suffix_table[m]
# # si le kmer est plus grand que le suffixe du milieu, parcourir la droite du tableau des suffixes
# if kmer > genome[suffix_table[m]:]:
# return search_suffix(genome, suffix_table, kmer, m+1, i2)
# # s'il est plus petit, parcourir la gauche
# else:
# return search_suffix(genome, suffix_table, kmer, i1, m)
class
Read
:
def
__init__
(
self
,
seq
):
self
.
__seq
=
seq
def
split_into_kmers
(
self
,
k
):
i
=
0
res
=
[]
# chaque k lettres, découper un nouveau mot et le placer dans la liste des kmers
for
i
in
range
(
0
,
len
(
self
.
__seq
),
k
):
res
.
append
(
self
.
__seq
[
i
:
i
+
k
])
# si le kmer de fin est trop petit le retirer
if
len
(
res
[
len
(
res
)
-
1
])
<
k
:
res
.
pop
()
return
res
#print(split_kmers(reads[1], 10))
for
read
in
reads
.
values
():
kmers
=
split_kmers
(
read
,
11
)
r
=
Read
(
read
)
kmers
=
r
.
split_kmers
(
11
)
for
gene_grippe
in
genes_grippe
.
values
():
suffix_table
=
create_suffix_table
(
gene_grippe
)
_
suffix_table
=
create_suffix_table
(
gene_grippe
)
for
kmer
in
kmers
:
pos
=
search_suffix
(
gene_grippe
,
suffix_table
,
kmer
,
0
,
len
(
suffix_table
))
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment