Source code for yoda_powers.toolbox.toolbox

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

##################################################
# Modules
##################################################
# Python modules
from pathlib import PosixPath, Path
from datetime import datetime
import sys


##################################################
# Functions

[docs]def welcome_args(version_arg, parser_arg):
    """
    use this Decorator to add information to scripts with arguments

    Args:
        version_arg: the program version
        parser_arg: the function which return :class:`argparse.ArgumentParser`

    Returns:
        None:

    Notes:
        use at main() decorator for script with :class:`argparse.ArgumentParser`

    Examples:
        >>> from yoda_powers.toolbox import welcome_args
        >>> @welcome_args(version, build_parser())
        >>> def main():
        >>>     # some code
        >>> main()
        >>> ################################################################################
        >>> #                             prog_name and version                            #
        >>> ################################################################################
        >>> Start time: 16-09-2020 at 14:39:02
        >>> Commande line run: ./filter_mummer.py -l mummer/GUY0011.pp1.fasta.PH0014.pp1.fasta.mum
        >>>
        >>> - Intput Info:
        >>>         - debug: False
        >>>         - plot: False
        >>>         - scaff_min: 1000000
        >>>         - fragments_min: 5000
        >>>         - csv_file: blabla
        >>> PROGRAMME CODE HERE
        >>> Stop time: 16-09-2020 at 14:39:02       Run time: 0:00:00.139732
        >>> ################################################################################
        >>> #                               End of execution                               #
        >>> ################################################################################

    """
    def welcome(func):
        def wrapper():
            start_time = datetime.now()
            parser = parser_arg
            version = version_arg
            parse_args = parser.parse_args()
            # Welcome message
            print(
                    f"""{"#" * 80}\n#{Path(parser.prog).stem + " " + version:^78}#\n{"#" * 80}\nStart time: {start_time:%d-%m-%Y at %H:%M:%S}\nCommande line run: {" ".join(sys.argv)}\n""")
            # resume to user
            print(" - Intput Info:")
            for k, v in vars(parse_args).items():
                print(f"\t - {k}: {v}")
            print("\n")
            func()
            print(
                    f"""\nStop time: {datetime.now():%d-%m-%Y at %H:%M:%S}\tRun time: {datetime.now() - start_time}\n{"#" * 80}\n#{'End of execution':^78}#\n{"#" * 80}""")
        return wrapper
    return welcome


[docs]def compare_list(list1, list2):
    """
    Function to compare two list and return common, uniq1 and uniq2

    Arguments:
        list1 (list): the first python :class:`list`
        list2 (list): the second python :class:`list`

    Returns:
        list: common, u1, u2
        common: the common elements of the 2 list,
        u1: uniq to list1,
        u2: uniq to list2

    Notes:
        ens1 = set([1, 2, 3, 4, 5, 6])\n
        ens2 = set([2, 3, 4])\n
        ens3 = set([6, 7, 8, 9])\n
        print(ens1 & ens2) set([2, 3, 4]) car ce sont les seuls à être en même temps dans ens1 et ens2\n
        print(ens1 | ens3) set([1, 2, 3, 4, 5, 6, 7, 8, 9]), les deux réunis\n
        print(ens1 & ens3) set([6]), même raison que deux lignes au dessus\n
        print(ens1 ^ ens3) set([1, 2, 3, 4, 5, 7, 8, 9]), l'union moins les éléments communs\n
        print(ens1 - ens2) set([1, 5, 6]), on enlève les éléments de ens2

    Examples:
        >>> l1 = [1, 2, 3, 4, 5, 6]
        >>> l2 = [6, 7, 8, 9]
        >>> com, u1, u2 = compare_list(l1, l2)
        >>> print(com)
        [6]
        >>> print(u1)
        [1, 2, 3, 4, 5]
        >>> print(u2)
        [7, 8, 9]

    """

    ens1 = set(list1)
    ens2 = set(list2)
    common = list(ens1 & ens2)
    uniq1 = list(ens1 - ens2)
    uniq2 = list(ens2 - ens1)
    return sorted(common, key=sort_human), sorted(uniq1, key=sort_human), sorted(uniq2, key=sort_human)


[docs]def existant_file(path):
    """
    'Type' for argparse - checks that file exists and return the absolute path as PosixPath() with pathlib

    Notes:
        function need modules:

        - pathlib
        - argparse


    Arguments:
        path (str): a path to existent file

    Returns:
        :class:`PosixPath`: ``Path(path).resolve()``

    Raises:
         ArgumentTypeError: If file `path` does not exist.
         ArgumentTypeError: If `path` is not a valid file.

    Examples:
        >>> import argparse
        >>> parser = argparse.ArgumentParser(prog='test.py', description='''This is demo''')
        >>> parser.add_argument('-f', '--file', metavar="<path/to/file>",type=existant_file, required=True,
            dest='path_file', help='path to file')

    """
    from argparse import ArgumentTypeError
    from pathlib import Path

    if not Path(path).exists():
        # Argparse uses the ArgumentTypeError to give a rejection message like:
        # error: argument input: x does not exist
        raise ArgumentTypeError(f'ERROR: file "{path}" does not exist')
    elif not Path(path).is_file():
        raise ArgumentTypeError(f'ERROR: "{path}" is not a valid file')

    return Path(path).resolve()


[docs]def max_key_dict(dico):
    """
    Function return the key of max value in dico values()

    Arguments:
        dico (:obj:`dict`): a python :class:`dict`

    Returns:
        str: key of the dict

    Example:
        >>> dico = {"A":0.5, "C":0.7, "T":0.01, "G":0.9}
        >>> key_max = max_key_dict(dico)
        >>> print(key_max)
        G
    """
    return max(dico, key=dico.get)


[docs]def sort_human(in_list, _nsre=None):
    """
    Sort a :class:`list` with alpha/digit on the way that humans expect,\n
    use list.sort(key=sort_human) or\n
    sorted(list, key=sort_human)).

    Arguments:
        in_list (:obj:`list`): a python :class:`list`
        _nsre (:obj:`re.compil`, optional): re expression use for compare , defaults re.compile('([0-9]+)'

    Returns:
        list: sorted with human sort number

    Example:
        >>> list_to_sorted = ["something1","something32","something17","something2","something29","something24"]
        >>> print(sorted(list_to_sorted, key=sort_human))
        ['something1', 'something2', 'something17', 'something24', 'something29', 'something32']
        >>> list_to_sorted.sort(key=sort_human)
        >>> print(list_to_sorted)
        ['something1', 'something2', 'something17', 'something24', 'something29', 'something32']

    """
    from warnings import warn
    import re
    if not _nsre:
        _nsre = re.compile('([0-9]+)')
    try:
        return [int(text) if text.isdigit() else f"{text}".lower() for text in re.split(_nsre, in_list)]
    except TypeError:
        if not isinstance(in_list, int):
            warn(
                    f"Yoda_powers::sort_human : element '{in_list}' on the list not understand so don't sort this element\n",
                    SyntaxWarning, stacklevel=2)
            return in_list


[docs]def readable_dir(prospective_dir):
    """
    'Type' for argparse - checks that directory exists and if readable, then return the absolute path as PosixPath() with pathlib

    Notes:
        function need modules:

        - pathlib
        - argparse


    Arguments:
        prospective_dir (str): a path to existent path

    Returns:
        :class:`PosixPath`: ``Path(path).resolve()``

    Raises:
         ArgumentTypeError: If directory `path` does not exist.
         ArgumentTypeError: If `path` is not a valid directory.

    Examples:
        >>> import argparse
        >>> parser = argparse.ArgumentParser(prog='test.py', description='''This is demo''')
        >>> parser.add_argument('-f', '--file', metavar="<path/to/file>",type=readable_dir, required=True,
            dest='path_file', help='path to file')
    """
    from argparse import ArgumentTypeError
    from pathlib import Path
    import os

    if not Path(prospective_dir).exists():
        # Argparse uses the ArgumentTypeError to give a rejection message like:
        # error: argument input: x does not exist
        raise ArgumentTypeError(f'ERROR: directory "{prospective_dir}" does not exist')
    elif not Path(prospective_dir).is_dir():
        raise ArgumentTypeError(f'ERROR: "{prospective_dir}" is not a valid directory')
    elif not os.access(prospective_dir, os.R_OK):
        raise ArgumentTypeError(f'ERROR: "{prospective_dir}" is not a readable dir')

    return Path(prospective_dir).resolve()


[docs]def replace_all(repls, str):
    """
    Function that take a dictionnary and text variable and return text variable with replace 'Key' from dictionnary with 'Value'.

    :param repls: a python dictionary
    :type repls: dict()
    :param str: a string where remplace some words
    :type str: str()
    :rtype: str()
    :return: - txt with replace 'Key' of dictionnary with 'Value' in the input txt

    Example:
        >>> text =  "i like apples, but pears scare me"
        >>> print(replace_all({"apple": "pear", "pear": "apple"}, text))
        i like pears, but apples scare me
    """
    import re
    return re.sub('|'.join(re.escape(key) for key in repls.keys()), lambda k: repls[k.group(0)], str)


[docs]def load_in_list(filename):
    """
    Check file exist and create generator with no line break
    file can be a gzip file with '.gz' extension

    Notes:
        function need modules:

        - pathlib

    Arguments:
        filename (str): a path to existent file

    Yields:
        :class:`str`: generator of rows without line break

    Raises:
         FileNotFoundError: If file `filename` does not exist or not valide file

    Example:
        >>> rows = load_in_list("filename")
        >>> list(rows)
        ["i like pears, but apples scare me","i like apples, but pears scare me","End of file"]
    """
    from pathlib import Path
    import gzip
    filename = Path(filename)
    if not filename.exists() or not filename.is_file():
        raise FileNotFoundError(
                f'ERROR: Yoda_powers.toolbox.load_in_list() file "{filename}" {"does not exist" if not filename.exists() else "is not a valid file"}')

    open_fn = gzip.open if filename.suffix == ".gz" else open
    with open_fn(filename, "rt") as file_in:
        return (line.rstrip() for line in file_in.readlines())


[docs]def load_in_list_col(filename, col=0, sep="\t"):
    """
    Check file exist and create generator with only selected column with no line break
    file can be a gzip file with '.gz' extension

    Arguments:
        filename (str): a path to existent file
        col (int, default): the selected column (python index). Default=0
        sep (str, default): the string separator. Default="\t"

    Yields:
        :class:`str`: generator of rows without line break

    Raises:
         FileNotFoundError: If file `filename` does not exist or not valide file

    Example:
        >>> rows = load_in_list_col("filename", col=1, sep=";")
        >>> list(rows)
        ["i like pears, but apples scare me","i like apples, but pears scare me","End of file"]
    """
    from pathlib import Path
    import gzip
    filename = Path(filename)
    if not filename.exists() or not filename.is_file():
        raise FileNotFoundError(
                f'ERROR: Yoda_powers.toolbox.load_in_list_col() file "{filename}" {"does not exist" if not filename.exists() else "is not a valid file"}')
    open_fn = gzip.open if filename.suffix == ".gz" else open
    with open_fn(filename, "rt") as file_in:
        # yield from (line.rstrip().split(sep)[col] for line in file_in.readlines())
        return (line.rstrip().split(sep)[col] for line in file_in.readlines())


[docs]def load_in_dict(filename, sep="\t"):
    """
    Check file exist and return a :class:`dict` with load rows first column is the key and value are other column.
    File can be a gzip file with '.gz' extension.

    Arguments:
        filename (str): a path to existent file
        sep (str, default): the string separator. Default="\t"

    Returns:
        :class:`dict`: a python :class:`dict` of file

    Raises:
         FileNotFoundError: If file `filename` does not exist or not valid file

    Example:
        >>> dico = load_in_dict(filename)
        >>> dico
        {
        "col1",["col2","col3"],
        "indiv1",["valeurcol2","valeurcol3"],
        "indiv2",["valeurcol2","valeurcol3"]
        }
    """
    from pathlib import Path
    import gzip
    filename = Path(filename)
    if not filename.exists() or not filename.is_file():
        raise FileNotFoundError(
                f'ERROR: Yoda_powers.toolbox.load_in_dict() file "{filename}" {"does not exist" if not filename.exists() else "is not a valid file"}')
    dico_out = {}
    open_fn = gzip.open if filename.suffix == ".gz" else open
    with open_fn(filename, "rt") as file_in:
        for line in file_in:
            tab_line = line.rstrip().split(sep)
            if tab_line[0] not in dico_out.keys():
                if len(tab_line[1:]) == 0:
                    dico_out[tab_line[0]] = None
                elif len(tab_line[1:]) == 1:
                    dico_out[tab_line[0]] = tab_line[1]
                else:
                    dico_out[tab_line[0]] = tab_line[1:]
    return dico_out


[docs]def load_in_dict_selected(filename, column_key=0, column_value=1, sep="\t"):
    """
    Check file exist and return a :class:`dict` with load rows first column is the key and value are other column.
    File can be a gzip file with '.gz' extension.

    Arguments:
        filename (str): a path to existent file
        column_key (int, default): the index for dict keys (python index). Default=0
        column_value (int, default): the index for dict value (python index). Default=1
        sep (str, default): the string separator. Default="\t"

    Returns:
        :class:`dict`: a python :class:`dict` of file

    Raises:
         FileNotFoundError: If file `filename` does not exist or not valid file
         IndexError: If missing data

    Example:
        >>> dico = load_in_dict(filename)
        >>> dico
        {
        "col1",["col2","col3"],
        "indiv1",["valeurcol2","valeurcol3"],
        "indiv2",["valeurcol2","valeurcol3"]
        }
    """
    from pathlib import Path
    import gzip
    filename = Path(filename)
    if not filename.exists() or not filename.is_file():
        raise FileNotFoundError(
                f'ERROR: Yoda_powers.toolbox.load_in_dict() file "{filename}" {"does not exist" if not filename.exists() else "is not a valid file"}')
    dico_out = {}
    open_fn = gzip.open if filename.suffix == ".gz" else open
    try:
        with open_fn(filename, "rt") as file_in:
            for num_line, line in enumerate(file_in):
                tab_line = line.rstrip().split(sep)
                if tab_line[column_key] not in dico_out.keys():
                    dico_out[tab_line[column_key]] = tab_line[column_value]
    except IndexError:
        raise IndexError(
                f'ERROR: Yoda_powers.toolbox.load_in_dict_selected() please check line {num_line + 1}, no value for column {column_value + 1}')

    return dico_out


[docs]def load_in_dict_dict(filename, sep="\t"):
    """
    Check file exist and return a :class:`dict` with load rows first column is the key and value are other column.
    File can be a gzip file with '.gz' extension.

    Arguments:
        filename (str): a path to existent file
        sep (str, default): the string separator. Default="\t"

    Returns:
        :class:`dict`: a python :class:`dict` of file

    Raises:
         FileNotFoundError: If file `filename` does not exist or not valid file
         IndexError: If missing data

    Example:
        >>> dico = load_in_dict_dict(filename)
        >>> dico
        {
        "indiv1",{"headerCol2":"toto","headerCol3":"tata"},
        "indiv2",{"headerCol2":"tutu","headerCol3":"titi"},
        "indiv3",{"headerCol2":"tete","headerCol3":"tyty"},
        }
    """
    from pathlib import Path
    import gzip
    from collections import defaultdict, OrderedDict
    filename = Path(filename)
    if not filename.exists() or not filename.is_file():
        raise FileNotFoundError(
                f'ERROR: Yoda_powers.toolbox.load_in_dict_dict() file "{filename}" {"does not exist" if not filename.exists() else "is not a valid file"}')
    dico_out = defaultdict(OrderedDict)
    open_fn = gzip.open if filename.suffix == ".gz" else open
    try:
        with open_fn(filename, "rt") as file_in:
            header = file_in.readline().rstrip().split(sep)
            for num_line, line in enumerate(file_in):
                tab_line = line.rstrip().split(sep)
                if tab_line[0] not in dico_out.keys():
                    for index, head in enumerate(header[1:]):
                        dico_out[tab_line[0]][head] = tab_line[index + 1]
    except IndexError:
        raise IndexError(
                f'ERROR: Yoda_powers.toolbox.load_in_dict_dict() please check line {num_line + 1}, no value for column {head}')
    return dico_out


#################################################
# CLASS
#################################################

[docs]class PrintCol:
    """
    Classe qui ajoute des méthodes à print pour afficher de la couleur

    Example:

    >>> PrintCol.red("j'affiche en rouge")
    j'affiche en rouge

    """
    __RED = '\033[91m'
    __GREEN = '\033[92m'
    __YELLOW = '\033[93m'
    __LIGHT_PURPLE = '\033[94m'
    __PURPLE = '\033[95m'
    __END = '\033[0m'

[docs]    @classmethod
    def red(cls, s):
        print(f"{cls.__RED}{s}{cls.__END}")

[docs]    @classmethod
    def green(cls, s):
        print(f"{cls.__GREEN}{s}{cls.__END}")

[docs]    @classmethod
    def yellow(cls, s):
        print(f"{cls.__YELLOW}{s}{cls.__END}")

[docs]    @classmethod
    def lightPurple(cls, s):
        print(f"{cls.__LIGHT_PURPLE}{s}{cls.__END}")

[docs]    @classmethod
    def purple(cls, s):
        print(f"{cls.__PURPLE}{s}{cls.__END}")


[docs]class AutoVivification(dict):
    """
    Implementation of perl's autovivification feature.

    Example:

    >>> a = AutoVivification()
    >>> a[1][2][3] = 4
    >>> a[1][3][3] = 5
    >>> a[1][2]['test'] = 6
    >>> print(a)
    >>> {1: {2: {'test': 6, 3: 4}, 3: {3: 5}}}

    """

    def __getitem__(self, item):
        try:
            return dict.__getitem__(self, item)
        except KeyError:
            value = self[item] = type(self)()
            return value


[docs]class Directory(PosixPath):
    """
    Class which derives from PosixPath.
    Checks that the string is and path to valid directory
    add function like list all files/dirs

    Example:
        >>> dir = Directory("./")
        >>> print(dir)
        >>> print(dir.list_files)
        >>> for file in dir.list_files_ext([".py"]):
        >>>     print(file)
    """

    def __init__(self, path_directory=None):
        """
        Arguments:
            path_directory (str): a path to directory
        """
        from pathlib import Path

        if not Path(path_directory).exists() or not Path(path_directory).is_dir():
            raise NotADirectoryError(
                    f'ERROR: Yoda_powers.toolbox.Directory() directory "{path_directory}" {"does not exist" if not Path(path_directory).exists() else "is not a valid directory"}')

        self.path_directory = Path(path_directory).resolve()
        self.__sep = "\n"
        super().__init__()

    @property
    def list_path(self):
        """Generator of files/directory include on folder"""
        return self.path_directory.glob("*")

    @property
    def list_dir(self):
        """Generator of directory include on folder"""
        return (elm for elm in self.path_directory.glob("*") if elm.is_dir())

    @property
    def list_files(self):
        """Generator of files include on folder"""
        return (elm for elm in self.path_directory.glob("*") if elm.is_file())

[docs]    def list_files_ext(self, ext=None):
        """Generator of files with specify extension include on folder

        Arguments:
            ext (list): a list of extension like [".py"]
        Yields:
            :class:`PosixPath`: Generator of files with specify extension include on folder
        """
        if not isinstance(ext, list) or not ext:
            raise ValueError(f'ERROR: Yoda_powers.toolbox.directory.list_files_ext() "ext" must be a list not "{ext}"')
        return (elm for elm in self.path_directory.glob(f"**/*") if (elm.is_file() and elm.suffix in ext))

    def __repr__(self):
        return f"{self.__class__}({self.__dict__})"

    def __str__(self):
        """print format"""
        return f"""
path_directory={self.path_directory}
list_path:\n   - {"   - ".join([f'{elm.name}{self.__sep}' for elm in self.list_path])}
list_dir:\n   - {"   - ".join([f'{elm.name}{self.__sep}' for elm in self.list_dir])}
list_files:\n   - {"   - ".join([f'{elm.name}{self.__sep}' for elm in self.list_files])}
"""