Skip to content
Snippets Groups Projects
Select Git revision
  • master
1 result

README.md

Blame
  • Forked from an inaccessible project.
    • Thomas Fritsch's avatar
      276eff54
      maj readme · 276eff54
      Thomas Fritsch authored
      - fork
      - nouveau dom (maj skin css)
      - passage PageRenderer -> Router
      - suppression classe Page (inutile à ce stade avec le Router)
      - fusion avec readme cours-react/tp2
      276eff54
      History
      maj readme
      Thomas Fritsch authored
      - fork
      - nouveau dom (maj skin css)
      - passage PageRenderer -> Router
      - suppression classe Page (inutile à ce stade avec le Router)
      - fusion avec readme cours-react/tp2
    Sequence.py 4.49 KiB
    import parasail
    
    
    class Sequence:
        """
        represente une sequence ADN
        """
        def __init__(self, sequence: str):
            """
            constructeur d'un objet Sequence
            :param sequence: str, une sequence ADN
            """
            self.sequence = sequence
            self.taille_kmer = 0
    
        def __len__(self) -> int:
            """
            renvoie la taille de l'objet Sequence
            :return: int
            """
            return len(self.sequence)
    
        def suffix_array(self) -> list:
            """
            construit la table des suffixes
            :return: list, la table des suffixes
    
            exemple : 
            >>> genome = "ATTCATGACCAGGTGCTGTCCAGGCTGAATGACCAGGTCGGATTATAGGCTATGACCAGGATGC"
            >>> genome_obj = Sequence(genome)
            >>> genome_obj.suffix_array()
            [27, 54, 31, 7, 57, 46, 21, 34, 10, 44, 51, 28, 4, 60, 41, 0, 63, 56, 20, 33, 9, 3, 55, 19, 32, 8, 38, 49, 24, 15, 26, 53, 30, 6, 59, 40, 62, 48, 23, 14, 58, 39, 47, 22, 35, 11, 17, 36, 12, 45, 43, 50, 2, 18, 37, 25, 52, 29, 5, 61, 13, 16, 42, 1]
            """
            return sorted(range(len(self.sequence)), key=lambda i: self.sequence[i:])
    
        def kmers(self, k) -> list:
            """
            renvoie la liste des kmers ; les kmers chevauchants sont filtrés
            :param k: int, la taille des kmers
            :return: list, les kmers
    
            exemple :
            >>> query = "ATGACCAATTCATGACCAGGTGCTGTCCAGGCTGAATG"
            >>> query_obj = Sequence(query)
            >>> query_obj.kmers(11)
            [0, 5, 10, 15, 20, 25, 27]
            """
            self.taille_kmer = k
            kmers = []  # dict
            for i in range(0, (len(self.sequence) - k), int(k/2)):
                kmers.append(i)
            return kmers
    
        def get_kmer(self, kmer):
            """
            renvoie le kmer correspondant à sa position entree en parametre
            :param kmer: int, position du kmer
            :return:
            """
            sequence = self.sequence
            return sequence[kmer:self.taille_kmer]
    
        def get_sequence(self) -> str:
            """
            renvoie la sequencede l'objet Sequence
            :return: str, la sequence
    
            exemple : 
            >>> seq = "ATGACCAATTCATGACCAGGTGCTGTCCAGGCTGAATG"
            >>> seq_obj = Sequence(seq)
            >>> seq_obj.get_sequence()
            ATGACCAATTCATGACCAGGTGCTGTCCAGGCTGAATG
            """
            return self.sequence
    
    
    def search_kmer(kmer: list, genome: str) -> list:
        """
        recherche les hits entre la liste de kmers et le genome
        :param kmer: list, liste de kmers
        :param genome: str, une sequence adn
        :return: list, la liste des positions des hits
        """
        liste_positions = []
        n = 0
        table_suffixes = genome.suffix_array()
        valeur_suffixe = int(len(table_suffixes) / 2)
        while kmer != genome.sequence[valeur_suffixe:valeur_suffixe+len(kmer)]:
            if kmer < genome.sequence[valeur_suffixe:valeur_suffixe+len(kmer)]:
                valeur_suffixe = int(valeur_suffixe/2)
            if kmer > genome.sequence[valeur_suffixe:valeur_suffixe+len(kmer)]:
                valeur_suffixe += int((len(table_suffixes) - valeur_suffixe)/2)
            if kmer == genome.sequence[valeur_suffixe:valeur_suffixe+len(kmer)]:
                val_egale = valeur_suffixe
                liste_positions.append(valeur_suffixe)
                if val_egale > 0 and kmer == genome.sequence[valeur_suffixe:valeur_suffixe+len(kmer)]:
                    valeur_suffixe = valeur_suffixe - 1
                    if kmer == genome.sequence[valeur_suffixe:valeur_suffixe+len(kmer)]:
                        liste_positions.append(valeur_suffixe)
                if val_egale < len(genome):
                    val_egale = val_egale + 1
                    if kmer == genome.sequence[val_egale:val_egale+len(kmer)]:
                        liste_positions.append(val_egale)
        return liste_positions
    
    
    def align_sequences(query: str, genome: str):
        """
        realise l'alignement
        :param query: str,
        :param genome: str,
        :return: le score, l'alignement
        """
        # alignement semi-global
        align_sg = parasail.sg_dx_trace_scan_sat(query, genome, 5, 1, parasail.dnafull)
        score = align_sg.score
        traceback = align_sg.get_traceback()
    
        # seq align (query = read, ref = génome)
        align_seq_query = traceback.query
        align_seq_genome = traceback.ref
        return score, align_seq_query, align_seq_genome
    
    
    if __name__ == "__main__":
        query = "ATGACCA"
        genome = "ATTCATGACCAGGTGCTGTCCAGGCTGAATGACCAGGTCGGATTATAGGCTATGACCAGGATGC"
        genome_objet = Sequence(genome)
        query_objet = Sequence(query)
        suffixe_array = genome_objet.suffix_array()
        for value in suffixe_array:
            print(genome[value:])