Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
petitebete
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Pacome Riobe
petitebete
Commits
cbb54d09
Commit
cbb54d09
authored
1 month ago
by
Pacome Riobe
Browse files
Options
Downloads
Patches
Plain Diff
Edit reads.py
parent
bf950cd9
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
reads.py
+70
-4
70 additions, 4 deletions
reads.py
with
70 additions
and
4 deletions
reads.py
+
70
−
4
View file @
cbb54d09
...
@@ -11,6 +11,7 @@ class Read_file:
...
@@ -11,6 +11,7 @@ class Read_file:
constructeur d
'
un objet Read
constructeur d
'
un objet Read
:param file: str, le chemin vers un fichier compressé fastq ou fasta
:param file: str, le chemin vers un fichier compressé fastq ou fasta
"""
"""
self
.
nb_bases
=
None
self
.
it
=
None
self
.
it
=
None
self
.
file
=
file
self
.
file
=
file
self
.
extension
=
"
rt
"
self
.
extension
=
"
rt
"
...
@@ -19,6 +20,7 @@ class Read_file:
...
@@ -19,6 +20,7 @@ class Read_file:
if
'
.fna.gz
'
in
self
.
file
or
'
.fasta
'
in
self
.
file
:
if
'
.fna.gz
'
in
self
.
file
or
'
.fasta
'
in
self
.
file
:
self
.
method
=
"
fasta
"
self
.
method
=
"
fasta
"
def
__len__
(
self
)
->
int
:
def
__len__
(
self
)
->
int
:
"""
"""
permet de compter le nombre de reads dans un objet Read
permet de compter le nombre de reads dans un objet Read
...
@@ -28,8 +30,34 @@ class Read_file:
...
@@ -28,8 +30,34 @@ class Read_file:
with
gzip
.
open
(
self
.
file
,
self
.
extension
)
as
fichier
:
with
gzip
.
open
(
self
.
file
,
self
.
extension
)
as
fichier
:
for
_
in
SeqIO
.
parse
(
fichier
,
self
.
method
):
for
_
in
SeqIO
.
parse
(
fichier
,
self
.
method
):
count
+=
1
count
+=
1
self
.
len
=
count
return
count
return
count
def
nombre_bases
(
self
):
"""
:return:
"""
n
=
0
while
self
.
lecture
(
n
)
is
not
None
:
self
.
lecture
(
n
)
n
+=
1
self
.
nb_bases
=
n
return
n
def
sous_sequence
(
self
,
position
,
taille
):
"""
Renvoie la sous sequence démarrant à la position donnee, et de taille donnee.
Sert a recuperer les kmers et les seeds
:param position: int, position de depart
:param taille: int, taille de la sous-sequence
:return: str, la sous sequence
"""
sous_sequence
=
""
for
n
in
range
(
position
,
taille
):
sous_sequence
+=
self
.
lecture
(
n
)
return
sous_sequence
def
next
(
self
):
def
next
(
self
):
"""
"""
appelle l
'
itérateur
appelle l
'
itérateur
...
@@ -56,6 +84,19 @@ class Read_file:
...
@@ -56,6 +84,19 @@ class Read_file:
seq
=
self
.
next
()
seq
=
self
.
next
()
kmer
=
seq
.
kmers
()
kmer
=
seq
.
kmers
()
def
kmers
(
self
,
k
:
int
):
"""
Renvoie la liste des kmers, sans trop de redondance. chaque position est couverte par deux kmers
:param k: int, taille des kmers
:return: list, les positions des kmers dans la sequence
"""
print
(
"
kmers
"
)
list_kmers
=
[]
for
n
in
range
(
0
,
self
.
nombre_bases
()
-
k
,
int
(
k
/
2
)):
list_kmers
.
append
(
n
)
return
list_kmers
def
concat_fasta
(
self
,
output
):
def
concat_fasta
(
self
,
output
):
"""
"""
...
@@ -66,18 +107,43 @@ class Read_file:
...
@@ -66,18 +107,43 @@ class Read_file:
with
gzip
.
open
(
self
.
file
,
self
.
extension
)
as
fichier
:
with
gzip
.
open
(
self
.
file
,
self
.
extension
)
as
fichier
:
record
=
SeqIO
.
parse
(
fichier
,
self
.
method
)
record
=
SeqIO
.
parse
(
fichier
,
self
.
method
)
for
n
in
range
(
0
,
self
.
__len__
()):
for
n
in
range
(
0
,
self
.
__len__
()):
out_fasta
.
write
(
str
(
record
.
__next__
().
seq
))
out_fasta
.
write
(
str
(
record
.
__next__
().
seq
)
+
"
\n
"
)
print
(
f
"
fin, resultat dans
{
output
}
"
)
print
(
f
"
fin, resultat dans
{
output
}
"
)
def
lecture
(
self
,
pos
):
"""
permet de parcourir la serie de reads de query comme une sequence unique
:return: str, la base à la position donnée
"""
total_bases
=
0
# Compteur global de bases
with
gzip
.
open
(
self
.
file
,
self
.
extension
)
as
f
:
for
i
,
line
in
enumerate
(
f
):
if
i
%
4
==
1
:
# Ligne de séquence (FASTQ format)
line
=
line
.
strip
()
seq_length
=
len
(
line
)
if
total_bases
<=
pos
<
total_bases
+
seq_length
:
return
line
[
pos
-
total_bases
]
# Trouvé
total_bases
+=
seq_length
# Mise à jour du compteur global
return
None
if
__name__
==
"
__main__
"
:
if
__name__
==
"
__main__
"
:
G
=
"
/home/m1miso/pacome.riobe.etu
/PycharmProjects/
p
ythonProject_petitebete/
petit
genome.f
n
a.gz
"
G
=
"
C:/Users/pacom
/PycharmProjects/
P
ythonProject_petitebete/genome.fa
stq
.gz
"
Q
=
"
/home/m1miso/pacome.riobe.etu
/PycharmProjects/
p
ythonProject_petitebete/
petit
query.fa
stq
.gz
"
Q
=
"
C:/Users/pacom
/PycharmProjects/
P
ythonProject_petitebete/query.f
n
a.gz
"
genome
=
Read_file
(
G
)
genome
=
Read_file
(
G
)
query
=
Read_file
(
Q
)
query
=
Read_file
(
Q
)
print
(
"
___
"
)
print
(
"
___
"
)
print
(
"
len
"
)
print
(
"
len
"
)
print
(
"
___
"
)
print
(
"
___
"
)
print
(
query
.
__len__
())
k
=
11
kmers
=
query
.
kmers
(
k
)
print
(
kmers
)
#for n in kmers:
# print(query.sous_sequence(n, k))
print
(
query
.
sous_sequence
(
0
,
30
))
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment