aboutsummaryrefslogtreecommitdiffstats
path: root/ledgerscripts/csv_processor
diff options
context:
space:
mode:
authorMatthew Lemon <matt@matthewlemon.com>2021-06-09 21:11:31 +0100
committerMatthew Lemon <matt@matthewlemon.com>2021-06-09 21:11:31 +0100
commit89b003f133926d9c728b78f6c825904b92333f02 (patch)
treebaf635cb9d38d89ea0b4979f7cd6639e5607109d /ledgerscripts/csv_processor
parente9fe1134f90bec4b3dc8b130dfcb47585d10670d (diff)
working better - lists unrecognised transaction descriptors
Diffstat (limited to 'ledgerscripts/csv_processor')
-rw-r--r--ledgerscripts/csv_processor38
1 files changed, 34 insertions, 4 deletions
diff --git a/ledgerscripts/csv_processor b/ledgerscripts/csv_processor
index bfd7fc9..d9ebd2c 100644
--- a/ledgerscripts/csv_processor
+++ b/ledgerscripts/csv_processor
@@ -17,6 +17,17 @@ my $csv = Text::CSV->new(
my %transaction;
my @jlist; # used to create the categories json fil0
my $cat_json;
+my @uncategorised;
+
+# given a list, make it unique
+# this works because a hash cannot have duplicate keys
+# so you can keep adding keys to a temp_hash with an
+# arbitary value (in this case 0), then extract the keys
+# from the hash at the end - they will all have to be unique
+sub uniq {
+ my %temp_hash = map { $_, 0 } @_;
+ return keys %temp_hash;
+}
my $file = $ARGV[0] or die "Need to get CSV file on the command line\n";
open( my $csvdata, '<:encoding(UTF-8)', $file )
@@ -27,6 +38,7 @@ open( my $csvdata, '<:encoding(UTF-8)', $file )
or die "Could not open category file $!\n";
local $/ = undef; # slurp mode!
$cat_json = <$category_file>;
+ close( $category_file );
}
my $catref = decode_json $cat_json;
@@ -36,6 +48,7 @@ my $cats = $catref->{"data"};
my @descs = map $_->{"desc"}, @{$cats};
+# self-explanatory
sub get_category_from_desc {
my $desc = shift;
for my $hsh ( @{$cats} ) {
@@ -43,6 +56,7 @@ sub get_category_from_desc {
return $hsh->{"category"};
}
}
+ return "UNKNOWN -> $desc";
}
while ( my $line = <$csvdata> ) {
@@ -50,34 +64,45 @@ while ( my $line = <$csvdata> ) {
chomp $line;
if ( $csv->parse($line) ) {
my @fields = $csv->fields();
+
+ # parse the date
+ # everything ends up in the transaction hash
$transaction{day} = substr $fields[0], 0, 2;
$transaction{month} = substr $fields[0], 3, 2;
$transaction{year} = substr $fields[0], 6, 4;
$transaction{date} = $fields[0];
+
+ # remove extraneous spaces from description
$fields[1] =~ s/\s+/ /g;
# used to create the categories json file - see below
push @jlist, { "desc" => $fields[1], "category" => "NONE" };
+ # add the description and cost
$transaction{desc} = $fields[1];
$transaction{cost} = $fields[2];
- for my $d (@descs) {
- if ( $transaction{desc} eq $d ) {
- $transaction{exp_type} = get_category_from_desc $d;
- }
+ my $c = get_category_from_desc $transaction{desc};
+
+ if ( $c =~ /^UNKNOWN ->.*$/ ) {
+ push @uncategorised, $transaction{desc};
}
+ else { $transaction{exp_type} = $c }
+ # parse the transaction type. Unused in ledger journal at moment
if ( $fields[1] =~ /^.+(VIS|DR|DD|TFR|CR|SO|ATM|\)\)\))$/ ) {
$transaction{type} = $1;
}
else { die("CANNOT DETERMINE TYPE!\n") }
+ # if the cost is negative, it is an expense category
if ( $fields[2] =~ /^\-/ ) {
$transaction{expense} = 1;
}
else { $transaction{expense} = 0; }
+ # write out the three line block representing the transaction
+ # in the ledger journal file
print join "",
(
$transaction{year}, "/", $transaction{month}, "/",
@@ -85,6 +110,7 @@ while ( my $line = <$csvdata> ) {
$transaction{desc}
),
"\n";
+
if ( $transaction{expense} == 1 ) {
( my $cost = $transaction{cost} ) =~ s/^\-//;
print qq(\t$transaction{exp_type}\t$cost\n);
@@ -100,6 +126,10 @@ while ( my $line = <$csvdata> ) {
else { warn "Line could not be parsed: $line\n"; }
}
+say "Unrecognized payees that need to be added to categories.json:";
+
+for (uniq @uncategorised) { say "* $_" };
+
# The following code is used to output a JSON file
# to be used for categories. Uncomment for use.
# my $data = encode_json {data => \@jlist};