1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
|
import re
import datetime
import logging
import unicodedata
from pathlib import Path
from typing import List, Tuple, Iterable, Optional, Any
from datamaps.plugins.dft.portfolio import project_data_from_master
from datamaps.process.cleansers import DATE_REGEX_4
from datamaps.core.temporal import Quarter
from openpyxl import load_workbook
logger = logging.getLogger('bcompiler.utils')
class ProjectData:
"""
ProjectData class
"""
def __init__(self, d: dict) -> None:
"""
:py:func:`OrderedDict` is easiest to get from project_data_from_master[x]
"""
self._data = d
def __len__(self) -> int:
return len(self._data)
def __getitem__(self, item):
return self._data[item]
def key_filter(self, key: str) -> List[Tuple]:
"""
Return a list of (k, v) tuples if k in master key.
"""
data = [item for item in self._data.items() if key in item[0]]
if not data:
raise KeyError("Sorry, there is no matching data")
return (data)
def pull_keys(self, input_iter: Iterable, flat=False) -> List[Tuple[Any, ...]]:
"""
Returns a list of (key, value) tuples from ProjectData if key matches a
key. The order of tuples is based on the order of keys passed in the iterable.
"""
if flat is True:
# search and replace troublesome EN DASH character
xs = [item for item in self._data.items()
for i in input_iter if item[0].strip().replace(unicodedata.lookup('EN DASH'), unicodedata.lookup('HYPHEN-MINUS')) == i]
xs = [_convert_str_date_to_object(x) for x in xs]
ts = sorted(xs, key=lambda x: input_iter.index(x[0].strip().replace(unicodedata.lookup('EN DASH'), unicodedata.lookup('HYPHEN-MINUS'))))
ts = [item[1] for item in ts]
return ts
else:
xs = [item for item in self._data.items()
for i in input_iter if item[0].replace(unicodedata.lookup('EN DASH'), unicodedata.lookup('HYPHEN-MINUS')) == i]
xs = [item for item in self._data.items()
for i in input_iter if item[0] == i]
xs = [_convert_str_date_to_object(x) for x in xs]
ts = sorted(xs, key=lambda x: input_iter.index(x[0].replace(unicodedata.lookup('EN DASH'), unicodedata.lookup('HYPHEN-MINUS'))))
return ts
def __repr__(self):
return f"ProjectData() - with data: {id(self._data)}"
def _convert_str_date_to_object(d_str: tuple) -> Tuple[str, Optional[datetime.date]]:
try:
if re.match(DATE_REGEX_4, d_str[1]):
try:
ds = d_str[1].split('-')
return (d_str[0], datetime.date(int(ds[0]), int(ds[1]), int(ds[2])))
except TypeError:
return d_str
else:
return d_str
except TypeError:
return d_str
class Master:
"""A Master object, representing the main central data item in ``datamaps``.
Args:
quarter (:py:class:`bcompiler.api.Quarter`): creating using ``Quarter(1, 2017)`` for example.
path (str): path to the master xlsx file
A master object is a composition between a :py:class:`datamaps.api.Quarter` object and an
actual master xlsx file on disk.
You create one, either by creating the Quarter object first, and using that as the first
parameter of the ``Master`` constructor, e.g.::
from bcompiler.api import Quarter
from bcompiler.api import Master
q1 = Quarter(1, 2016)
m1 = Master(q1, '/tmp/master_1_2016.xlsx')
or by doing both in one::
m1 = Master(Quarter(1, 2016), '/tmp/master_1_2016.xlsx')
Once you have a ``Master`` object, you can access project data from it, like this::
project_data = m1['Project Title']
The following *attributes* are available on `m1` once created as such, e.g.::
data = m1.data
quarter = m1.quarter
filename = m1.filename
..etc
"""
def __init__(self, quarter: Quarter, path: str) -> None:
self._quarter = quarter
self.path = path
self._data = project_data_from_master(self.path)
self._project_titles = [item for item in self.data.keys()]
self.year = self._quarter.year
def __getitem__(self, project_name):
return ProjectData(self._data[project_name])
@property
def data(self):
"""Return all the data contained in the master in a large, nested dictionary.
The resulting data structure contains a dictionary of :py:class:`colletions.OrderedDict` items whose
key is the name of a project::
"Project Name": OrderedDict("key": "value"
...)
This object can then be further interrogated, for example to obtain all key/values
from a partictular project, by doing::
d = Master.data
project_data = d['PROJECT_NAME']
"""
return self._data
@property
def quarter(self):
"""Returns the ``Quarter`` object associated with the ``Master``.
Example::
q1 = m.quarter
``q1`` can then be further interrogated as documented in :py:class:`core.temporal.Quarter`.
"""
return self._quarter
@property
def month(self):
"""
Returns the ``Month`` object associated with the ``Master``.
"""
breakpoint()
return [m.month for m in self.quarter.months]
@property
def filename(self):
"""The filename of the master xlsx file, e.g. ``master_1_2017.xlsx``.
"""
p = Path(self.path)
return p.name
@property
def projects(self):
"""A list of project titles derived from the master xlsx.
"""
return self._project_titles
def duplicate_keys(self, to_log=None):
"""Checks for duplicate keys in a master xlsx file.
Args:
to_log (bool): Optional True or False, depending on whether you want to see duplicates reported in a ``WARNING`` log message. This is used mainly for internal purposes within ``bcompiler``.
Returns:
duplicates (set): a set of duplicated keys
"""
wb = load_workbook(self.path)
ws = wb.active
col_a = next(ws.iter_cols())
col_a = [item.value for item in col_a]
seen: set = set()
uniq = []
dups: set = set()
for x in col_a:
if x not in seen:
uniq.append(x)
seen.add(x)
else:
dups.add(x)
if to_log and len(dups) > 0:
for x in dups:
logger.warning(f"{self.path} contains duplicate key: \"{x}\". Masters cannot contain duplicate keys. Rename them.")
return True
elif to_log and len(dups) == 0:
logger.info(f"No duplicate keys in {self.path}")
return False
elif len(dups) > 0:
return dups
else:
return False
def __repr__(self):
return f"Master({self.path}, {self.quarter.quarter}, {self.quarter.year})"
|