#!/usr/bin/env python3
# -*- coding: utf-8 -*-
##################################################
# Modules
##################################################
# Python modules
from pathlib import PosixPath, Path
from datetime import datetime
import sys
##################################################
# Functions
[docs]def welcome_args(version_arg, parser_arg):
"""
use this Decorator to add information to scripts with arguments
Args:
version_arg: the program version
parser_arg: the function which return :class:`argparse.ArgumentParser`
Returns:
None:
Notes:
use at main() decorator for script with :class:`argparse.ArgumentParser`
Examples:
>>> from yoda_powers.toolbox import welcome_args
>>> @welcome_args(version, build_parser())
>>> def main():
>>> # some code
>>> main()
>>> ################################################################################
>>> # prog_name and version #
>>> ################################################################################
>>> Start time: 16-09-2020 at 14:39:02
>>> Commande line run: ./filter_mummer.py -l mummer/GUY0011.pp1.fasta.PH0014.pp1.fasta.mum
>>>
>>> - Intput Info:
>>> - debug: False
>>> - plot: False
>>> - scaff_min: 1000000
>>> - fragments_min: 5000
>>> - csv_file: blabla
>>> PROGRAMME CODE HERE
>>> Stop time: 16-09-2020 at 14:39:02 Run time: 0:00:00.139732
>>> ################################################################################
>>> # End of execution #
>>> ################################################################################
"""
def welcome(func):
def wrapper():
start_time = datetime.now()
parser = parser_arg
version = version_arg
parse_args = parser.parse_args()
# Welcome message
print(
f"""{"#" * 80}\n#{Path(parser.prog).stem + " " + version:^78}#\n{"#" * 80}\nStart time: {start_time:%d-%m-%Y at %H:%M:%S}\nCommande line run: {" ".join(sys.argv)}\n""")
# resume to user
print(" - Intput Info:")
for k, v in vars(parse_args).items():
print(f"\t - {k}: {v}")
print("\n")
func()
print(
f"""\nStop time: {datetime.now():%d-%m-%Y at %H:%M:%S}\tRun time: {datetime.now() - start_time}\n{"#" * 80}\n#{'End of execution':^78}#\n{"#" * 80}""")
return wrapper
return welcome
[docs]def compare_list(list1, list2):
"""
Function to compare two list and return common, uniq1 and uniq2
Arguments:
list1 (list): the first python :class:`list`
list2 (list): the second python :class:`list`
Returns:
list: common, u1, u2
common: the common elements of the 2 list,
u1: uniq to list1,
u2: uniq to list2
Notes:
ens1 = set([1, 2, 3, 4, 5, 6])\n
ens2 = set([2, 3, 4])\n
ens3 = set([6, 7, 8, 9])\n
print(ens1 & ens2) set([2, 3, 4]) car ce sont les seuls à être en même temps dans ens1 et ens2\n
print(ens1 | ens3) set([1, 2, 3, 4, 5, 6, 7, 8, 9]), les deux réunis\n
print(ens1 & ens3) set([6]), même raison que deux lignes au dessus\n
print(ens1 ^ ens3) set([1, 2, 3, 4, 5, 7, 8, 9]), l'union moins les éléments communs\n
print(ens1 - ens2) set([1, 5, 6]), on enlève les éléments de ens2
Examples:
>>> l1 = [1, 2, 3, 4, 5, 6]
>>> l2 = [6, 7, 8, 9]
>>> com, u1, u2 = compare_list(l1, l2)
>>> print(com)
[6]
>>> print(u1)
[1, 2, 3, 4, 5]
>>> print(u2)
[7, 8, 9]
"""
ens1 = set(list1)
ens2 = set(list2)
common = list(ens1 & ens2)
uniq1 = list(ens1 - ens2)
uniq2 = list(ens2 - ens1)
return sorted(common, key=sort_human), sorted(uniq1, key=sort_human), sorted(uniq2, key=sort_human)
[docs]def existant_file(path):
"""
'Type' for argparse - checks that file exists and return the absolute path as PosixPath() with pathlib
Notes:
function need modules:
- pathlib
- argparse
Arguments:
path (str): a path to existent file
Returns:
:class:`PosixPath`: ``Path(path).resolve()``
Raises:
ArgumentTypeError: If file `path` does not exist.
ArgumentTypeError: If `path` is not a valid file.
Examples:
>>> import argparse
>>> parser = argparse.ArgumentParser(prog='test.py', description='''This is demo''')
>>> parser.add_argument('-f', '--file', metavar="<path/to/file>",type=existant_file, required=True,
dest='path_file', help='path to file')
"""
from argparse import ArgumentTypeError
from pathlib import Path
if not Path(path).exists():
# Argparse uses the ArgumentTypeError to give a rejection message like:
# error: argument input: x does not exist
raise ArgumentTypeError(f'ERROR: file "{path}" does not exist')
elif not Path(path).is_file():
raise ArgumentTypeError(f'ERROR: "{path}" is not a valid file')
return Path(path).resolve()
[docs]def max_key_dict(dico):
"""
Function return the key of max value in dico values()
Arguments:
dico (:obj:`dict`): a python :class:`dict`
Returns:
str: key of the dict
Example:
>>> dico = {"A":0.5, "C":0.7, "T":0.01, "G":0.9}
>>> key_max = max_key_dict(dico)
>>> print(key_max)
G
"""
return max(dico, key=dico.get)
[docs]def sort_human(in_list, _nsre=None):
"""
Sort a :class:`list` with alpha/digit on the way that humans expect,\n
use list.sort(key=sort_human) or\n
sorted(list, key=sort_human)).
Arguments:
in_list (:obj:`list`): a python :class:`list`
_nsre (:obj:`re.compil`, optional): re expression use for compare , defaults re.compile('([0-9]+)'
Returns:
list: sorted with human sort number
Example:
>>> list_to_sorted = ["something1","something32","something17","something2","something29","something24"]
>>> print(sorted(list_to_sorted, key=sort_human))
['something1', 'something2', 'something17', 'something24', 'something29', 'something32']
>>> list_to_sorted.sort(key=sort_human)
>>> print(list_to_sorted)
['something1', 'something2', 'something17', 'something24', 'something29', 'something32']
"""
from warnings import warn
import re
if not _nsre:
_nsre = re.compile('([0-9]+)')
try:
return [int(text) if text.isdigit() else f"{text}".lower() for text in re.split(_nsre, in_list)]
except TypeError:
if not isinstance(in_list, int):
warn(
f"Yoda_powers::sort_human : element '{in_list}' on the list not understand so don't sort this element\n",
SyntaxWarning, stacklevel=2)
return in_list
[docs]def readable_dir(prospective_dir):
"""
'Type' for argparse - checks that directory exists and if readable, then return the absolute path as PosixPath() with pathlib
Notes:
function need modules:
- pathlib
- argparse
Arguments:
prospective_dir (str): a path to existent path
Returns:
:class:`PosixPath`: ``Path(path).resolve()``
Raises:
ArgumentTypeError: If directory `path` does not exist.
ArgumentTypeError: If `path` is not a valid directory.
Examples:
>>> import argparse
>>> parser = argparse.ArgumentParser(prog='test.py', description='''This is demo''')
>>> parser.add_argument('-f', '--file', metavar="<path/to/file>",type=readable_dir, required=True,
dest='path_file', help='path to file')
"""
from argparse import ArgumentTypeError
from pathlib import Path
import os
if not Path(prospective_dir).exists():
# Argparse uses the ArgumentTypeError to give a rejection message like:
# error: argument input: x does not exist
raise ArgumentTypeError(f'ERROR: directory "{prospective_dir}" does not exist')
elif not Path(prospective_dir).is_dir():
raise ArgumentTypeError(f'ERROR: "{prospective_dir}" is not a valid directory')
elif not os.access(prospective_dir, os.R_OK):
raise ArgumentTypeError(f'ERROR: "{prospective_dir}" is not a readable dir')
return Path(prospective_dir).resolve()
[docs]def replace_all(repls, str):
"""
Function that take a dictionnary and text variable and return text variable with replace 'Key' from dictionnary with 'Value'.
:param repls: a python dictionary
:type repls: dict()
:param str: a string where remplace some words
:type str: str()
:rtype: str()
:return: - txt with replace 'Key' of dictionnary with 'Value' in the input txt
Example:
>>> text = "i like apples, but pears scare me"
>>> print(replace_all({"apple": "pear", "pear": "apple"}, text))
i like pears, but apples scare me
"""
import re
return re.sub('|'.join(re.escape(key) for key in repls.keys()), lambda k: repls[k.group(0)], str)
[docs]def load_in_list(filename):
"""
Check file exist and create generator with no line break
file can be a gzip file with '.gz' extension
Notes:
function need modules:
- pathlib
Arguments:
filename (str): a path to existent file
Yields:
:class:`str`: generator of rows without line break
Raises:
FileNotFoundError: If file `filename` does not exist or not valide file
Example:
>>> rows = load_in_list("filename")
>>> list(rows)
["i like pears, but apples scare me","i like apples, but pears scare me","End of file"]
"""
from pathlib import Path
import gzip
filename = Path(filename)
if not filename.exists() or not filename.is_file():
raise FileNotFoundError(
f'ERROR: Yoda_powers.toolbox.load_in_list() file "{filename}" {"does not exist" if not filename.exists() else "is not a valid file"}')
open_fn = gzip.open if filename.suffix == ".gz" else open
with open_fn(filename, "rt") as file_in:
return (line.rstrip() for line in file_in.readlines())
[docs]def load_in_list_col(filename, col=0, sep="\t"):
"""
Check file exist and create generator with only selected column with no line break
file can be a gzip file with '.gz' extension
Arguments:
filename (str): a path to existent file
col (int, default): the selected column (python index). Default=0
sep (str, default): the string separator. Default="\t"
Yields:
:class:`str`: generator of rows without line break
Raises:
FileNotFoundError: If file `filename` does not exist or not valide file
Example:
>>> rows = load_in_list_col("filename", col=1, sep=";")
>>> list(rows)
["i like pears, but apples scare me","i like apples, but pears scare me","End of file"]
"""
from pathlib import Path
import gzip
filename = Path(filename)
if not filename.exists() or not filename.is_file():
raise FileNotFoundError(
f'ERROR: Yoda_powers.toolbox.load_in_list_col() file "{filename}" {"does not exist" if not filename.exists() else "is not a valid file"}')
open_fn = gzip.open if filename.suffix == ".gz" else open
with open_fn(filename, "rt") as file_in:
# yield from (line.rstrip().split(sep)[col] for line in file_in.readlines())
return (line.rstrip().split(sep)[col] for line in file_in.readlines())
[docs]def load_in_dict(filename, sep="\t"):
"""
Check file exist and return a :class:`dict` with load rows first column is the key and value are other column.
File can be a gzip file with '.gz' extension.
Arguments:
filename (str): a path to existent file
sep (str, default): the string separator. Default="\t"
Returns:
:class:`dict`: a python :class:`dict` of file
Raises:
FileNotFoundError: If file `filename` does not exist or not valid file
Example:
>>> dico = load_in_dict(filename)
>>> dico
{
"col1",["col2","col3"],
"indiv1",["valeurcol2","valeurcol3"],
"indiv2",["valeurcol2","valeurcol3"]
}
"""
from pathlib import Path
import gzip
filename = Path(filename)
if not filename.exists() or not filename.is_file():
raise FileNotFoundError(
f'ERROR: Yoda_powers.toolbox.load_in_dict() file "{filename}" {"does not exist" if not filename.exists() else "is not a valid file"}')
dico_out = {}
open_fn = gzip.open if filename.suffix == ".gz" else open
with open_fn(filename, "rt") as file_in:
for line in file_in:
tab_line = line.rstrip().split(sep)
if tab_line[0] not in dico_out.keys():
if len(tab_line[1:]) == 0:
dico_out[tab_line[0]] = None
elif len(tab_line[1:]) == 1:
dico_out[tab_line[0]] = tab_line[1]
else:
dico_out[tab_line[0]] = tab_line[1:]
return dico_out
[docs]def load_in_dict_selected(filename, column_key=0, column_value=1, sep="\t"):
"""
Check file exist and return a :class:`dict` with load rows first column is the key and value are other column.
File can be a gzip file with '.gz' extension.
Arguments:
filename (str): a path to existent file
column_key (int, default): the index for dict keys (python index). Default=0
column_value (int, default): the index for dict value (python index). Default=1
sep (str, default): the string separator. Default="\t"
Returns:
:class:`dict`: a python :class:`dict` of file
Raises:
FileNotFoundError: If file `filename` does not exist or not valid file
IndexError: If missing data
Example:
>>> dico = load_in_dict(filename)
>>> dico
{
"col1",["col2","col3"],
"indiv1",["valeurcol2","valeurcol3"],
"indiv2",["valeurcol2","valeurcol3"]
}
"""
from pathlib import Path
import gzip
filename = Path(filename)
if not filename.exists() or not filename.is_file():
raise FileNotFoundError(
f'ERROR: Yoda_powers.toolbox.load_in_dict() file "{filename}" {"does not exist" if not filename.exists() else "is not a valid file"}')
dico_out = {}
open_fn = gzip.open if filename.suffix == ".gz" else open
try:
with open_fn(filename, "rt") as file_in:
for num_line, line in enumerate(file_in):
tab_line = line.rstrip().split(sep)
if tab_line[column_key] not in dico_out.keys():
dico_out[tab_line[column_key]] = tab_line[column_value]
except IndexError:
raise IndexError(
f'ERROR: Yoda_powers.toolbox.load_in_dict_selected() please check line {num_line + 1}, no value for column {column_value + 1}')
return dico_out
[docs]def load_in_dict_dict(filename, sep="\t"):
"""
Check file exist and return a :class:`dict` with load rows first column is the key and value are other column.
File can be a gzip file with '.gz' extension.
Arguments:
filename (str): a path to existent file
sep (str, default): the string separator. Default="\t"
Returns:
:class:`dict`: a python :class:`dict` of file
Raises:
FileNotFoundError: If file `filename` does not exist or not valid file
IndexError: If missing data
Example:
>>> dico = load_in_dict_dict(filename)
>>> dico
{
"indiv1",{"headerCol2":"toto","headerCol3":"tata"},
"indiv2",{"headerCol2":"tutu","headerCol3":"titi"},
"indiv3",{"headerCol2":"tete","headerCol3":"tyty"},
}
"""
from pathlib import Path
import gzip
from collections import defaultdict, OrderedDict
filename = Path(filename)
if not filename.exists() or not filename.is_file():
raise FileNotFoundError(
f'ERROR: Yoda_powers.toolbox.load_in_dict_dict() file "{filename}" {"does not exist" if not filename.exists() else "is not a valid file"}')
dico_out = defaultdict(OrderedDict)
open_fn = gzip.open if filename.suffix == ".gz" else open
try:
with open_fn(filename, "rt") as file_in:
header = file_in.readline().rstrip().split(sep)
for num_line, line in enumerate(file_in):
tab_line = line.rstrip().split(sep)
if tab_line[0] not in dico_out.keys():
for index, head in enumerate(header[1:]):
dico_out[tab_line[0]][head] = tab_line[index + 1]
except IndexError:
raise IndexError(
f'ERROR: Yoda_powers.toolbox.load_in_dict_dict() please check line {num_line + 1}, no value for column {head}')
return dico_out
#################################################
# CLASS
#################################################
[docs]class PrintCol:
"""
Classe qui ajoute des méthodes à print pour afficher de la couleur
Example:
>>> PrintCol.red("j'affiche en rouge")
j'affiche en rouge
"""
__RED = '\033[91m'
__GREEN = '\033[92m'
__YELLOW = '\033[93m'
__LIGHT_PURPLE = '\033[94m'
__PURPLE = '\033[95m'
__END = '\033[0m'
[docs] @classmethod
def red(cls, s):
print(f"{cls.__RED}{s}{cls.__END}")
[docs] @classmethod
def green(cls, s):
print(f"{cls.__GREEN}{s}{cls.__END}")
[docs] @classmethod
def yellow(cls, s):
print(f"{cls.__YELLOW}{s}{cls.__END}")
[docs] @classmethod
def lightPurple(cls, s):
print(f"{cls.__LIGHT_PURPLE}{s}{cls.__END}")
[docs] @classmethod
def purple(cls, s):
print(f"{cls.__PURPLE}{s}{cls.__END}")
[docs]class AutoVivification(dict):
"""
Implementation of perl's autovivification feature.
Example:
>>> a = AutoVivification()
>>> a[1][2][3] = 4
>>> a[1][3][3] = 5
>>> a[1][2]['test'] = 6
>>> print(a)
>>> {1: {2: {'test': 6, 3: 4}, 3: {3: 5}}}
"""
def __getitem__(self, item):
try:
return dict.__getitem__(self, item)
except KeyError:
value = self[item] = type(self)()
return value
[docs]class Directory(PosixPath):
"""
Class which derives from PosixPath.
Checks that the string is and path to valid directory
add function like list all files/dirs
Example:
>>> dir = Directory("./")
>>> print(dir)
>>> print(dir.list_files)
>>> for file in dir.list_files_ext([".py"]):
>>> print(file)
"""
def __init__(self, path_directory=None):
"""
Arguments:
path_directory (str): a path to directory
"""
from pathlib import Path
if not Path(path_directory).exists() or not Path(path_directory).is_dir():
raise NotADirectoryError(
f'ERROR: Yoda_powers.toolbox.Directory() directory "{path_directory}" {"does not exist" if not Path(path_directory).exists() else "is not a valid directory"}')
self.path_directory = Path(path_directory).resolve()
self.__sep = "\n"
super().__init__()
@property
def list_path(self):
"""Generator of files/directory include on folder"""
return self.path_directory.glob("*")
@property
def list_dir(self):
"""Generator of directory include on folder"""
return (elm for elm in self.path_directory.glob("*") if elm.is_dir())
@property
def list_files(self):
"""Generator of files include on folder"""
return (elm for elm in self.path_directory.glob("*") if elm.is_file())
[docs] def list_files_ext(self, ext=None):
"""Generator of files with specify extension include on folder
Arguments:
ext (list): a list of extension like [".py"]
Yields:
:class:`PosixPath`: Generator of files with specify extension include on folder
"""
if not isinstance(ext, list) or not ext:
raise ValueError(f'ERROR: Yoda_powers.toolbox.directory.list_files_ext() "ext" must be a list not "{ext}"')
return (elm for elm in self.path_directory.glob(f"**/*") if (elm.is_file() and elm.suffix in ext))
def __repr__(self):
return f"{self.__class__}({self.__dict__})"
def __str__(self):
"""print format"""
return f"""
path_directory={self.path_directory}
list_path:\n - {" - ".join([f'{elm.name}{self.__sep}' for elm in self.list_path])}
list_dir:\n - {" - ".join([f'{elm.name}{self.__sep}' for elm in self.list_dir])}
list_files:\n - {" - ".join([f'{elm.name}{self.__sep}' for elm in self.list_files])}
"""