aboutsummaryrefslogtreecommitdiffstats
path: root/datamaps/core
diff options
context:
space:
mode:
Diffstat (limited to 'datamaps/core')
-rw-r--r--datamaps/core/__init__.py1
-rw-r--r--datamaps/core/master.py208
2 files changed, 0 insertions, 209 deletions
diff --git a/datamaps/core/__init__.py b/datamaps/core/__init__.py
index 305208d..2a3c50b 100644
--- a/datamaps/core/__init__.py
+++ b/datamaps/core/__init__.py
@@ -1,3 +1,2 @@
from .row import Row
from .temporal import Quarter, FinancialYear
-from .master import Master, ProjectData
diff --git a/datamaps/core/master.py b/datamaps/core/master.py
deleted file mode 100644
index 1fbfe90..0000000
--- a/datamaps/core/master.py
+++ /dev/null
@@ -1,208 +0,0 @@
-import re
-import datetime
-import logging
-import unicodedata
-from pathlib import Path
-from typing import List, Tuple, Iterable, Optional, Any
-
-from ..utils import project_data_from_master
-from ..process.cleansers import DATE_REGEX_4
-from .temporal import Quarter
-
-from openpyxl import load_workbook
-
-logger = logging.getLogger('bcompiler.utils')
-
-
-class ProjectData:
- """
- ProjectData class
- """
- def __init__(self, d: dict) -> None:
- """
- :py:func:`OrderedDict` is easiest to get from project_data_from_master[x]
- """
- self._data = d
-
- def __len__(self) -> int:
- return len(self._data)
-
- def __getitem__(self, item):
- return self._data[item]
-
- def key_filter(self, key: str) -> List[Tuple]:
- """
- Return a list of (k, v) tuples if k in master key.
- """
- data = [item for item in self._data.items() if key in item[0]]
- if not data:
- raise KeyError("Sorry, there is no matching data")
- return (data)
-
- def pull_keys(self, input_iter: Iterable, flat=False) -> List[Tuple[Any, ...]]:
- """
- Returns a list of (key, value) tuples from ProjectData if key matches a
- key. The order of tuples is based on the order of keys passed in the iterable.
- """
- if flat is True:
- # search and replace troublesome EN DASH character
- xs = [item for item in self._data.items()
- for i in input_iter if item[0].strip().replace(unicodedata.lookup('EN DASH'), unicodedata.lookup('HYPHEN-MINUS')) == i]
- xs = [_convert_str_date_to_object(x) for x in xs]
- ts = sorted(xs, key=lambda x: input_iter.index(x[0].strip().replace(unicodedata.lookup('EN DASH'), unicodedata.lookup('HYPHEN-MINUS'))))
- ts = [item[1] for item in ts]
- return ts
- else:
- xs = [item for item in self._data.items()
- for i in input_iter if item[0].replace(unicodedata.lookup('EN DASH'), unicodedata.lookup('HYPHEN-MINUS')) == i]
- xs = [item for item in self._data.items()
- for i in input_iter if item[0] == i]
- xs = [_convert_str_date_to_object(x) for x in xs]
- ts = sorted(xs, key=lambda x: input_iter.index(x[0].replace(unicodedata.lookup('EN DASH'), unicodedata.lookup('HYPHEN-MINUS'))))
- return ts
-
- def __repr__(self):
- return f"ProjectData() - with data: {id(self._data)}"
-
-
-def _convert_str_date_to_object(d_str: tuple) -> Tuple[str, Optional[datetime.date]]:
- try:
- if re.match(DATE_REGEX_4, d_str[1]):
- try:
- ds = d_str[1].split('-')
- return (d_str[0], datetime.date(int(ds[0]), int(ds[1]), int(ds[2])))
- except TypeError:
- return d_str
- else:
- return d_str
- except TypeError:
- return d_str
-
-
-class Master:
- """A Master object, representing the main central data item in ``bcompiler``.
-
- Args:
- quarter (:py:class:`bcompiler.api.Quarter`): creating using ``Quarter(1, 2017)`` for example.
- path (str): path to the master xlsx file
-
- A master object is a composition between a :py:class:`bcompiler.api.Quarter` object and an
- actual master xlsx file on disk.
-
- You create one, either by creating the Quarter object first, and using that as the first
- parameter of the ``Master`` constructor, e.g.::
-
- from bcompiler.api import Quarter
- from bcompiler.api import Master
-
- q1 = Quarter(1, 2016)
- m1 = Master(q1, '/tmp/master_1_2016.xlsx')
-
- or by doing both in one::
-
- m1 = Master(Quarter(1, 2016), '/tmp/master_1_2016.xlsx')
-
- Once you have a ``Master`` object, you can access project data from it, like this::
-
- project_data = m1['Project Title']
-
-
- The following *attributes* are available on `m1` once created as such, e.g.::
-
- data = m1.data
- quarter = m1.quarter
- filename = m1.filename
- ..etc
- """
- def __init__(self, quarter: Quarter, path: str) -> None:
- self._quarter = quarter
- self.path = path
- self._data = project_data_from_master(self.path)
- self._project_titles = [item for item in self.data.keys()]
- self.year = self._quarter.year
-
- def __getitem__(self, project_name):
- return ProjectData(self._data[project_name])
-
- @property
- def data(self):
- """Return all the data contained in the master in a large, nested dictionary.
-
- The resulting data structure contains a dictionary of :py:class:`colletions.OrderedDict` items whose
- key is the name of a project::
-
- "Project Name": OrderedDict("key": "value"
- ...)
-
- This object can then be further interrogated, for example to obtain all key/values
- from a partictular project, by doing::
-
- d = Master.data
- project_data = d['PROJECT_NAME']
-
- """
- return self._data
-
- @property
- def quarter(self):
- """Returns the ``Quarter`` object associated with the ``Master``.
-
- Example::
-
- q1 = m.quarter
-
- ``q1`` can then be further interrogated as documented in :py:class:`core.temporal.Quarter`.
-
- """
-
- return self._quarter
-
- @property
- def filename(self):
- """The filename of the master xlsx file, e.g. ``master_1_2017.xlsx``.
- """
- p = Path(self.path)
- return p.name
-
- @property
- def projects(self):
- """A list of project titles derived from the master xlsx.
- """
- return self._project_titles
-
- def duplicate_keys(self, to_log=None):
- """Checks for duplicate keys in a master xlsx file.
-
- Args:
- to_log (bool): Optional True or False, depending on whether you want to see duplicates reported in a ``WARNING`` log message. This is used mainly for internal purposes within ``bcompiler``.
-
- Returns:
- duplicates (set): a set of duplicated keys
- """
- wb = load_workbook(self.path)
- ws = wb.active
- col_a = next(ws.iter_cols())
- col_a = [item.value for item in col_a]
- seen: set = set()
- uniq = []
- dups: set = set()
- for x in col_a:
- if x not in seen:
- uniq.append(x)
- seen.add(x)
- else:
- dups.add(x)
- if to_log and len(dups) > 0:
- for x in dups:
- logger.warning(f"{self.path} contains duplicate key: \"{x}\". Masters cannot contain duplicate keys. Rename them.")
- return True
- elif to_log and len(dups) == 0:
- logger.info(f"No duplicate keys in {self.path}")
- return False
- elif len(dups) > 0:
- return dups
- else:
- return False
-
- def __repr__(self):
- return f"Master({self.path}, {self.quarter.quarter}, {self.quarter.year})"