import re import datetime import logging import unicodedata from pathlib import Path from typing import List, Tuple, Iterable, Optional, Any from ..utils import project_data_from_master from ..process.cleansers import DATE_REGEX_4 from .temporal import Quarter from openpyxl import load_workbook logger = logging.getLogger('bcompiler.utils') class ProjectData: """ ProjectData class """ def __init__(self, d: dict) -> None: """ :py:func:`OrderedDict` is easiest to get from project_data_from_master[x] """ self._data = d def __len__(self) -> int: return len(self._data) def __getitem__(self, item): return self._data[item] def key_filter(self, key: str) -> List[Tuple]: """ Return a list of (k, v) tuples if k in master key. """ data = [item for item in self._data.items() if key in item[0]] if not data: raise KeyError("Sorry, there is no matching data") return (data) def pull_keys(self, input_iter: Iterable, flat=False) -> List[Tuple[Any, ...]]: """ Returns a list of (key, value) tuples from ProjectData if key matches a key. The order of tuples is based on the order of keys passed in the iterable. """ if flat is True: # search and replace troublesome EN DASH character xs = [item for item in self._data.items() for i in input_iter if item[0].strip().replace(unicodedata.lookup('EN DASH'), unicodedata.lookup('HYPHEN-MINUS')) == i] xs = [_convert_str_date_to_object(x) for x in xs] ts = sorted(xs, key=lambda x: input_iter.index(x[0].strip().replace(unicodedata.lookup('EN DASH'), unicodedata.lookup('HYPHEN-MINUS')))) ts = [item[1] for item in ts] return ts else: xs = [item for item in self._data.items() for i in input_iter if item[0].replace(unicodedata.lookup('EN DASH'), unicodedata.lookup('HYPHEN-MINUS')) == i] xs = [item for item in self._data.items() for i in input_iter if item[0] == i] xs = [_convert_str_date_to_object(x) for x in xs] ts = sorted(xs, key=lambda x: input_iter.index(x[0].replace(unicodedata.lookup('EN DASH'), unicodedata.lookup('HYPHEN-MINUS')))) return ts def __repr__(self): return f"ProjectData() - with data: {id(self._data)}" def _convert_str_date_to_object(d_str: tuple) -> Tuple[str, Optional[datetime.date]]: try: if re.match(DATE_REGEX_4, d_str[1]): try: ds = d_str[1].split('-') return (d_str[0], datetime.date(int(ds[0]), int(ds[1]), int(ds[2]))) except TypeError: return d_str else: return d_str except TypeError: return d_str class Master: """A Master object, representing the main central data item in ``bcompiler``. Args: quarter (:py:class:`bcompiler.api.Quarter`): creating using ``Quarter(1, 2017)`` for example. path (str): path to the master xlsx file A master object is a composition between a :py:class:`bcompiler.api.Quarter` object and an actual master xlsx file on disk. You create one, either by creating the Quarter object first, and using that as the first parameter of the ``Master`` constructor, e.g.:: from bcompiler.api import Quarter from bcompiler.api import Master q1 = Quarter(1, 2016) m1 = Master(q1, '/tmp/master_1_2016.xlsx') or by doing both in one:: m1 = Master(Quarter(1, 2016), '/tmp/master_1_2016.xlsx') Once you have a ``Master`` object, you can access project data from it, like this:: project_data = m1['Project Title'] The following *attributes* are available on `m1` once created as such, e.g.:: data = m1.data quarter = m1.quarter filename = m1.filename ..etc """ def __init__(self, quarter: Quarter, path: str) -> None: self._quarter = quarter self.path = path self._data = project_data_from_master(self.path) self._project_titles = [item for item in self.data.keys()] self.year = self._quarter.year def __getitem__(self, project_name): return ProjectData(self._data[project_name]) @property def data(self): """Return all the data contained in the master in a large, nested dictionary. The resulting data structure contains a dictionary of :py:class:`colletions.OrderedDict` items whose key is the name of a project:: "Project Name": OrderedDict("key": "value" ...) This object can then be further interrogated, for example to obtain all key/values from a partictular project, by doing:: d = Master.data project_data = d['PROJECT_NAME'] """ return self._data @property def quarter(self): """Returns the ``Quarter`` object associated with the ``Master``. Example:: q1 = m.quarter ``q1`` can then be further interrogated as documented in :py:class:`core.temporal.Quarter`. """ return self._quarter @property def filename(self): """The filename of the master xlsx file, e.g. ``master_1_2017.xlsx``. """ p = Path(self.path) return p.name @property def projects(self): """A list of project titles derived from the master xlsx. """ return self._project_titles def duplicate_keys(self, to_log=None): """Checks for duplicate keys in a master xlsx file. Args: to_log (bool): Optional True or False, depending on whether you want to see duplicates reported in a ``WARNING`` log message. This is used mainly for internal purposes within ``bcompiler``. Returns: duplicates (set): a set of duplicated keys """ wb = load_workbook(self.path) ws = wb.active col_a = next(ws.iter_cols()) col_a = [item.value for item in col_a] seen: set = set() uniq = [] dups: set = set() for x in col_a: if x not in seen: uniq.append(x) seen.add(x) else: dups.add(x) if to_log and len(dups) > 0: for x in dups: logger.warning(f"{self.path} contains duplicate key: \"{x}\". Masters cannot contain duplicate keys. Rename them.") return True elif to_log and len(dups) == 0: logger.info(f"No duplicate keys in {self.path}") return False elif len(dups) > 0: return dups else: return False def __repr__(self): return f"Master({self.path}, {self.quarter.quarter}, {self.quarter.year})"