#! /usr/bin/env python # -*- coding: utf-8 -*- ############################################################################## ## DendroPy Phylogenetic Computing Library. ## ## Copyright 2010-2015 Jeet Sukumaran and Mark T. Holder. ## All rights reserved. ## ## See "LICENSE.rst" for terms and conditions of usage. ## ## If you use this work or any portion thereof in published work, ## please cite it as: ## ## Sukumaran, J. and M. T. Holder. 2010. DendroPy: a Python library ## for phylogenetic computing. Bioinformatics 26: 1569-1571. ## ############################################################################## """ Various text-manipulating and formatting utilities. """ import re import sys import time import itertools import locale import codecs ############################################################################### ## Cross-version compatibility try: from StringIO import StringIO # Python 2 legacy support: StringIO in this module is the one needed (not io) except ImportError: from io import StringIO # Python 3 ############################################################################### ## Unicode/String Conversions try: ENCODING = locale.getdefaultlocale()[1] except ValueError: ENCODING = None # let default value be assigned below if ENCODING == None: ENCODING = 'UTF-8' def bytes_to_text(s): """ Converts a byte string (as read from, e.g., standard input) to a text string. In Python 3, this is from type ``bytes`` to ``str``. In Python 2, this is, confusingly, from type ``str`` to ``unicode``. """ s = codecs.decode(s, ENCODING) if sys.hexversion < 0x03000000: s = codecs.encode(s, "utf-8") return s def parse_curie_standard_qualified_name(prefixed_name, sep=":"): if sep not in prefixed_name: raise ValueError("'{}' is not a valid CURIE-standard qualified name".format(prefixed_name)) return prefixed_name.split(":", 1) ## From: # The Peyotl module of the Open Tree of Life Project # Mark T. Holder # https://github.com/mtholder/peyotl # https://github.com/mtholder/peyotl/blob/c3a544211edc669e664bae28095d52cecfa004f3/peyotl/utility/str_util.py#L5-L25 if sys.version_info.major == 2: def is_str_type(x): return isinstance(x, basestring) else: def is_str_type(x): return isinstance(x, str) ############################################################################### ## def camel_case(s): components = s.split('_') return components[0] + "".join(x.title() for x in components[1:]) def snake_case(name): s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name) return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower() ############################################################################### ## def unique_taxon_label_map(taxa, taxon_label_map=None, max_label_len=0, logger=None): """ Given a list of taxa, returns a dictionary with the Taxon objects as keys and string labels as values, where the labels are guaranteed to be unique. If ``taxon_label_map`` is pre-populated (as : 'label'), then those labels will be used as the basis for the label composition, otherwise the original taxon object label will be used. ``max_label_len`` can be used to restrict the maximum length of the labels. """ if taxon_label_map is None: taxon_label_map = {} for t in taxa: taxon_label_map[t] = t.label labels = [] for t in taxon_label_map: label = taxon_label_map[t] idx = 1 if label in labels: candidate_label = label while candidate_label in labels: idx += 1 if max_label_len > 0: k = max_label_len - len(str(idx)) if k < 1: raise ValueError("Unable to make labels unique with maximum label length of %d" % max_label_len) candidate_label = label[:k] + str(idx) else: candidate_label = label + str(idx) label = candidate_label labels.append(label) taxon_label_map[t] = label return taxon_label_map ############################################################################### ## def format_dict_table(*args, **kwargs): """ Returns a (single) string representation of a tuple of dictionaries in a table format. This method can read the column names directly off the dictionary keys, but if a tuple of these keys is provided in the 'column_names' variable, then the order of column_names will follow the order of the fields/keys in that variable. """ display = format_dict_table_rows(*args, **kwargs) if display: return "\n".join(display) else: return "" def format_dict_table_rows(rows, column_names=None, max_column_width=None, border_style=2): """ Returns a string representation of a tuple of dictionaries in a table format. This method can read the column names directly off the dictionary keys, but if a tuple of these keys is provided in the 'column_names' variable, then the order of column_names will follow the order of the fields/keys in that variable. """ if column_names or len(rows) > 0: lengths = {} rules = {} if column_names: column_list = column_names else: try: column_list = rows[0].keys() except: column_list = None if column_list: # characters that make up the table rules border_style = int(border_style) #border_style = 0 if border_style == 0: vertical_rule = ' ' horizontal_rule = '' rule_junction = '' elif border_style == 1: vertical_rule = ' ' horizontal_rule = '-' rule_junction = '-' else: vertical_rule = ' | ' horizontal_rule = '-' rule_junction = '-+-' if border_style >= 3: left_table_edge_rule = '| ' right_table_edge_rule = ' |' left_table_edge_rule_junction = '+-' right_table_edge_rule_junction = '-+' else: left_table_edge_rule = '' right_table_edge_rule = '' left_table_edge_rule_junction = '' right_table_edge_rule_junction = '' if max_column_width: column_list = [c[:max_column_width] for c in column_list] trunc_rows = [] for row in rows: new_row = {} for k in row.keys(): new_row[k[:max_column_width]] = str(row[k])[:max_column_width] trunc_rows.append(new_row) rows = trunc_rows for col in column_list: rls = [len(str(row[col])) for row in rows] lengths[col] = max(rls+[len(col)]) rules[col] = horizontal_rule*lengths[col] template_elements = ["%%(%s)-%ss" % (col, lengths[col]) for col in column_list] row_template = vertical_rule.join(template_elements) border_template = rule_junction.join(template_elements) full_line = left_table_edge_rule_junction + (border_template % rules) + right_table_edge_rule_junction display = [] if border_style > 0: display.append(full_line) display.append(left_table_edge_rule + (row_template % dict(zip(column_list, column_list))) + right_table_edge_rule) if border_style > 0: display.append(full_line) for row in rows: display.append(left_table_edge_rule + (row_template % row) + right_table_edge_rule) if border_style > 0: display.append(full_line) return display else: return '' else: return ''