aboutsummaryrefslogtreecommitdiff
path: root/datamaps/tests/test_cleanser.py
blob: 344a9a3c8c6994705a159c15a4ac3f6089cac1ad (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import datetime
from ..process.cleansers import Cleanser


def test_cleaning_dot_date():
    ds = "25.1.72"
    ds_double = "25.01.72"
    four_year = "25.01.2072"
    c = Cleanser(ds)
    c_double = Cleanser(ds_double)
    c_four = Cleanser(four_year)
    assert c.clean() == datetime.date(2072, 1, 25)
    assert c_double.clean() == datetime.date(2072, 1, 25)
    assert c_four.clean() == datetime.date(2072, 1, 25)


def test_cleaning_slash_date():
    ds = "25/1/72"
    ds_double = "25/01/72"
    four_year = "25/01/2072"
    c = Cleanser(ds)
    c_double = Cleanser(ds_double)
    c_four = Cleanser(four_year)
    assert c.clean() == datetime.date(2072, 1, 25)
    assert c_double.clean() == datetime.date(2072, 1, 25)
    assert c_four.clean() == datetime.date(2072, 1, 25)


def test_em_dash_key():
    contains_em_dash = 'Pre 14-15 BL – Income both Revenue and Capital'
    c = Cleanser(contains_em_dash)
    assert c.clean() == 'Pre 14-15 BL - Income both Revenue and Capital'


def test_double_trailing_space():
    contains_double_trailing = 'Pre 14-15 BL - Incoming both Revenue and Capital  '
    contains_single_trailing = 'Pre 14-15 BL - Incoming both Revenue and Capital '
    c = Cleanser(contains_double_trailing)
    assert c.clean() == 'Pre 14-15 BL - Incoming both Revenue and Capital'
    c = Cleanser(contains_single_trailing)
    assert c.clean() == 'Pre 14-15 BL - Incoming both Revenue and Capital'