diff options
author | Matthew Lemon <matt@matthewlemon.com> | 2021-06-09 21:11:31 +0100 |
---|---|---|
committer | Matthew Lemon <matt@matthewlemon.com> | 2021-06-09 21:11:31 +0100 |
commit | 89b003f133926d9c728b78f6c825904b92333f02 (patch) | |
tree | baf635cb9d38d89ea0b4979f7cd6639e5607109d /ledgerscripts/csv_processor | |
parent | e9fe1134f90bec4b3dc8b130dfcb47585d10670d (diff) |
working better - lists unrecognised transaction descriptors
Diffstat (limited to 'ledgerscripts/csv_processor')
-rw-r--r-- | ledgerscripts/csv_processor | 38 |
1 files changed, 34 insertions, 4 deletions
diff --git a/ledgerscripts/csv_processor b/ledgerscripts/csv_processor index bfd7fc9..d9ebd2c 100644 --- a/ledgerscripts/csv_processor +++ b/ledgerscripts/csv_processor @@ -17,6 +17,17 @@ my $csv = Text::CSV->new( my %transaction; my @jlist; # used to create the categories json fil0 my $cat_json; +my @uncategorised; + +# given a list, make it unique +# this works because a hash cannot have duplicate keys +# so you can keep adding keys to a temp_hash with an +# arbitary value (in this case 0), then extract the keys +# from the hash at the end - they will all have to be unique +sub uniq { + my %temp_hash = map { $_, 0 } @_; + return keys %temp_hash; +} my $file = $ARGV[0] or die "Need to get CSV file on the command line\n"; open( my $csvdata, '<:encoding(UTF-8)', $file ) @@ -27,6 +38,7 @@ open( my $csvdata, '<:encoding(UTF-8)', $file ) or die "Could not open category file $!\n"; local $/ = undef; # slurp mode! $cat_json = <$category_file>; + close( $category_file ); } my $catref = decode_json $cat_json; @@ -36,6 +48,7 @@ my $cats = $catref->{"data"}; my @descs = map $_->{"desc"}, @{$cats}; +# self-explanatory sub get_category_from_desc { my $desc = shift; for my $hsh ( @{$cats} ) { @@ -43,6 +56,7 @@ sub get_category_from_desc { return $hsh->{"category"}; } } + return "UNKNOWN -> $desc"; } while ( my $line = <$csvdata> ) { @@ -50,34 +64,45 @@ while ( my $line = <$csvdata> ) { chomp $line; if ( $csv->parse($line) ) { my @fields = $csv->fields(); + + # parse the date + # everything ends up in the transaction hash $transaction{day} = substr $fields[0], 0, 2; $transaction{month} = substr $fields[0], 3, 2; $transaction{year} = substr $fields[0], 6, 4; $transaction{date} = $fields[0]; + + # remove extraneous spaces from description $fields[1] =~ s/\s+/ /g; # used to create the categories json file - see below push @jlist, { "desc" => $fields[1], "category" => "NONE" }; + # add the description and cost $transaction{desc} = $fields[1]; $transaction{cost} = $fields[2]; - for my $d (@descs) { - if ( $transaction{desc} eq $d ) { - $transaction{exp_type} = get_category_from_desc $d; - } + my $c = get_category_from_desc $transaction{desc}; + + if ( $c =~ /^UNKNOWN ->.*$/ ) { + push @uncategorised, $transaction{desc}; } + else { $transaction{exp_type} = $c } + # parse the transaction type. Unused in ledger journal at moment if ( $fields[1] =~ /^.+(VIS|DR|DD|TFR|CR|SO|ATM|\)\)\))$/ ) { $transaction{type} = $1; } else { die("CANNOT DETERMINE TYPE!\n") } + # if the cost is negative, it is an expense category if ( $fields[2] =~ /^\-/ ) { $transaction{expense} = 1; } else { $transaction{expense} = 0; } + # write out the three line block representing the transaction + # in the ledger journal file print join "", ( $transaction{year}, "/", $transaction{month}, "/", @@ -85,6 +110,7 @@ while ( my $line = <$csvdata> ) { $transaction{desc} ), "\n"; + if ( $transaction{expense} == 1 ) { ( my $cost = $transaction{cost} ) =~ s/^\-//; print qq(\t$transaction{exp_type}\t$cost\n); @@ -100,6 +126,10 @@ while ( my $line = <$csvdata> ) { else { warn "Line could not be parsed: $line\n"; } } +say "Unrecognized payees that need to be added to categories.json:"; + +for (uniq @uncategorised) { say "* $_" }; + # The following code is used to output a JSON file # to be used for categories. Uncomment for use. # my $data = encode_json {data => \@jlist}; |