import re
import datetime
import logging
import unicodedata
from pathlib import Path
from typing import List, Tuple, Iterable, Optional, Any
from datamaps.plugins.dft.portfolio import project_data_from_master
from datamaps.process.cleansers import DATE_REGEX_4
from datamaps.core.temporal import Quarter
from openpyxl import load_workbook
logger = logging.getLogger("bcompiler.utils")
class ProjectData:
"""
ProjectData class
"""
def __init__(self, d: dict) -> None:
"""
:py:func:`OrderedDict` is easiest to get from project_data_from_master[x]
"""
self._data = d
def __len__(self) -> int:
return len(self._data)
def __getitem__(self, item):
return self._data[item]
def key_filter(self, key: str) -> List[Tuple]:
"""
Return a list of (k, v) tuples if k in master key.
"""
data = [item for item in self._data.items() if key in item[0]]
if not data:
raise KeyError("Sorry, there is no matching data")
return data
def pull_keys(self, input_iter: Iterable, flat=False) -> List[Tuple[Any, ...]]:
"""
Returns a list of (key, value) tuples from ProjectData if key matches a
key. The order of tuples is based on the order of keys passed in the iterable.
"""
if flat is True:
# search and replace troublesome EN DASH character
xs = [
item
for item in self._data.items()
for i in input_iter
if item[0]
.strip()
.replace(
unicodedata.lookup("EN DASH"), unicodedata.lookup("HYPHEN-MINUS")
)
== i
]
xs = [_convert_str_date_to_object(x) for x in xs]
ts = sorted(
xs,
key=lambda x: input_iter.index(
x[0]
.strip()
.replace(
unicodedata.lookup("EN DASH"),
unicodedata.lookup("HYPHEN-MINUS"),
)
),
)
ts = [item[1] for item in ts]
return ts
else:
xs = [
item
for item in self._data.items()
for i in input_iter
if item[0].replace(
unicodedata.lookup("EN DASH"), unicodedata.lookup("HYPHEN-MINUS")
)
== i
]
xs = [
item for item in self._data.items() for i in input_iter if item[0] == i
]
xs = [_convert_str_date_to_object(x) for x in xs]
ts = sorted(
xs,
key=lambda x: input_iter.index(
x[0].replace(
unicodedata.lookup("EN DASH"),
unicodedata.lookup("HYPHEN-MINUS"),
)
),
)
return ts
def __repr__(self):
return f"ProjectData() - with data: {id(self._data)}"
def _convert_str_date_to_object(d_str: tuple) -> Tuple[str, Optional[datetime.date]]:
try:
if re.match(DATE_REGEX_4, d_str[1]):
try:
ds = d_str[1].split("-")
return (d_str[0], datetime.date(int(ds[0]), int(ds[1]), int(ds[2])))
except TypeError:
return d_str
else:
return d_str
except TypeError:
return d_str
class Master:
"""A Master object, representing the main central data item in ``datamaps``.
Args:
quarter (:py:class:`bcompiler.api.Quarter`): creating using ``Quarter(1, 2017)`` for example.
path (str): path to the master xlsx file
A master object is a composition between a :py:class:`datamaps.api.Quarter` object and an
actual master xlsx file on disk.
You create one, either by creating the Quarter object first, and using that as the first
parameter of the ``Master`` constructor, e.g.::
from bcompiler.api import Quarter
from bcompiler.api import Master
q1 = Quarter(1, 2016)
m1 = Master(q1, '/tmp/master_1_2016.xlsx')
or by doing both in one::
m1 = Master(Quarter(1, 2016), '/tmp/master_1_2016.xlsx')
Once you have a ``Master`` object, you can access project data from it, like this::
project_data = m1['Project Title']
The following *attributes* are available on `m1` once created as such, e.g.::
data = m1.data
quarter = m1.quarter
filename = m1.filename
..etc
"""
def __init__(self, quarter: Quarter, path: str, declared_month=None) -> None:
self._quarter = quarter
self._declared_month = declared_month
self.path = path
self._data = project_data_from_master(self.path)
self._project_titles = [item for item in self.data.keys()]
self.year = self._quarter.year
def __getitem__(self, project_name):
return ProjectData(self._data[project_name])
@property
def data(self):
"""Return all the data contained in the master in a large, nested dictionary.
The resulting data structure contains a dictionary of :py:class:`colletions.OrderedDict` items whose
key is the name of a project::
"Project Name": OrderedDict("key": "value"
...)
This object can then be further interrogated, for example to obtain all key/values
from a partictular project, by doing::
d = Master.data
project_data = d['PROJECT_NAME']
"""
return self._data
@property
def quarter(self):
"""Returns the ``Quarter`` object associated with the ``Master``.
Example::
q1 = m.quarter
``q1`` can then be further interrogated as documented in :py:class:`core.temporal.Quarter`.
"""
return self._quarter
@property
def month(self):
"""
Returns the ``Month`` object associated with the ``Master``.
"""
months = {
1: "January",
2: "February",
3: "March",
4: "April",
5: "May",
6: "June",
7: "July",
8: "August",
9: "September",
10: "October",
11: "November",
12: "December",
}
return [
m.month
for m in self.quarter.months
if m.month == months[self._declared_month]
][0]
@property
def filename(self):
"""The filename of the master xlsx file, e.g. ``master_1_2017.xlsx``."""
p = Path(self.path)
return p.name
@property
def projects(self):
"""A list of project titles derived from the master xlsx."""
return self._project_titles
def duplicate_keys(self, to_log=None):
"""Checks for duplicate keys in a master xlsx file.
Args:
to_log (bool): Optional True or False, depending on whether you want to see duplicates reported in a ``WARNING`` log message. This is used mainly for internal purposes within ``bcompiler``.
Returns:
duplicates (set): a set of duplicated keys
"""
wb = load_workbook(self.path)
ws = wb.active
col_a = next(ws.iter_cols())
col_a = [item.value for item in col_a]
seen: set = set()
uniq = []
dups: set = set()
for x in col_a:
if x not in seen:
uniq.append(x)
seen.add(x)
else:
dups.add(x)
if to_log and len(dups) > 0:
for x in dups:
logger.warning(
f'{self.path} contains duplicate key: "{x}". Masters cannot contain duplicate keys. Rename them.'
)
return True
elif to_log and len(dups) == 0:
logger.info(f"No duplicate keys in {self.path}")
return False
elif len(dups) > 0:
return dups
else:
return False
def __repr__(self):
return f"Master({self.path}, {self.quarter.quarter}, {self.quarter.year})"