Skip to content
Snippets Groups Projects
Commit cbb54d09 authored by Pacome Riobe's avatar Pacome Riobe
Browse files

Edit reads.py

parent bf950cd9
No related branches found
No related tags found
No related merge requests found
...@@ -11,6 +11,7 @@ class Read_file: ...@@ -11,6 +11,7 @@ class Read_file:
constructeur d'un objet Read constructeur d'un objet Read
:param file: str, le chemin vers un fichier compressé fastq ou fasta :param file: str, le chemin vers un fichier compressé fastq ou fasta
""" """
self.nb_bases = None
self.it = None self.it = None
self.file = file self.file = file
self.extension = "rt" self.extension = "rt"
...@@ -19,6 +20,7 @@ class Read_file: ...@@ -19,6 +20,7 @@ class Read_file:
if '.fna.gz' in self.file or '.fasta' in self.file: if '.fna.gz' in self.file or '.fasta' in self.file:
self.method = "fasta" self.method = "fasta"
def __len__(self) -> int: def __len__(self) -> int:
""" """
permet de compter le nombre de reads dans un objet Read permet de compter le nombre de reads dans un objet Read
...@@ -28,8 +30,34 @@ class Read_file: ...@@ -28,8 +30,34 @@ class Read_file:
with gzip.open(self.file, self.extension) as fichier: with gzip.open(self.file, self.extension) as fichier:
for _ in SeqIO.parse(fichier, self.method): for _ in SeqIO.parse(fichier, self.method):
count += 1 count += 1
self.len = count
return count return count
def nombre_bases(self):
"""
:return:
"""
n = 0
while self.lecture(n) is not None:
self.lecture(n)
n += 1
self.nb_bases = n
return n
def sous_sequence(self, position, taille):
"""
Renvoie la sous sequence démarrant à la position donnee, et de taille donnee.
Sert a recuperer les kmers et les seeds
:param position: int, position de depart
:param taille: int, taille de la sous-sequence
:return: str, la sous sequence
"""
sous_sequence = ""
for n in range(position, taille):
sous_sequence += self.lecture(n)
return sous_sequence
def next(self): def next(self):
""" """
appelle l'itérateur appelle l'itérateur
...@@ -56,6 +84,19 @@ class Read_file: ...@@ -56,6 +84,19 @@ class Read_file:
seq = self.next() seq = self.next()
kmer = seq.kmers() kmer = seq.kmers()
def kmers(self, k: int):
"""
Renvoie la liste des kmers, sans trop de redondance. chaque position est couverte par deux kmers
:param k: int, taille des kmers
:return: list, les positions des kmers dans la sequence
"""
print("kmers")
list_kmers = []
for n in range(0, self.nombre_bases() - k, int(k/2)):
list_kmers.append(n)
return list_kmers
def concat_fasta(self, output): def concat_fasta(self, output):
""" """
...@@ -66,18 +107,43 @@ class Read_file: ...@@ -66,18 +107,43 @@ class Read_file:
with gzip.open(self.file, self.extension) as fichier: with gzip.open(self.file, self.extension) as fichier:
record = SeqIO.parse(fichier, self.method) record = SeqIO.parse(fichier, self.method)
for n in range(0, self.__len__()): for n in range(0, self.__len__()):
out_fasta.write(str(record.__next__().seq)) out_fasta.write(str(record.__next__().seq) + "\n")
print(f"fin, resultat dans {output}") print(f"fin, resultat dans {output}")
def lecture(self, pos):
"""
permet de parcourir la serie de reads de query comme une sequence unique
:return: str, la base à la position donnée
"""
total_bases = 0 # Compteur global de bases
with gzip.open(self.file, self.extension) as f:
for i, line in enumerate(f):
if i % 4 == 1: # Ligne de séquence (FASTQ format)
line = line.strip()
seq_length = len(line)
if total_bases <= pos < total_bases + seq_length:
return line[pos - total_bases] # Trouvé
total_bases += seq_length # Mise à jour du compteur global
return None
if __name__ == "__main__": if __name__ == "__main__":
G = "/home/m1miso/pacome.riobe.etu/PycharmProjects/pythonProject_petitebete/petitgenome.fna.gz" G = "C:/Users/pacom/PycharmProjects/PythonProject_petitebete/genome.fastq.gz"
Q = "/home/m1miso/pacome.riobe.etu/PycharmProjects/pythonProject_petitebete/petitquery.fastq.gz" Q = "C:/Users/pacom/PycharmProjects/PythonProject_petitebete/query.fna.gz"
genome = Read_file(G) genome = Read_file(G)
query = Read_file(Q) query = Read_file(Q)
print("___") print("___")
print("len") print("len")
print("___") print("___")
print(query.__len__()) k = 11
kmers = query.kmers(k)
print(kmers)
#for n in kmers:
# print(query.sous_sequence(n, k))
print(query.sous_sequence(0,30))
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment