working better - lists unrecognised transaction descriptors

author: Matthew Lemon <matt@matthewlemon.com> 2021-06-09 21:11:31 +0100
committer: Matthew Lemon <matt@matthewlemon.com> 2021-06-09 21:11:31 +0100
commit: 89b003f133926d9c728b78f6c825904b92333f02 (patch)
tree: baf635cb9d38d89ea0b4979f7cd6639e5607109d /ledgerscripts/csv_processor
parent: e9fe1134f90bec4b3dc8b130dfcb47585d10670d (diff)
1 files changed, 34 insertions, 4 deletions
diff --git a/ledgerscripts/csv_processor b/ledgerscripts/csv_processor
index bfd7fc9..d9ebd2c 100644
--- a/ledgerscripts/csv_processor
+++ b/ledgerscripts/csv_processor
@@ -17,6 +17,17 @@ my $csv = Text::CSV->new(
 my %transaction;
 my @jlist;    # used to create the categories json fil0
 my $cat_json;
+my @uncategorised;
+
+# given a list, make it unique
+# this works because a hash cannot have duplicate keys
+# so you can keep adding keys to a temp_hash with an
+# arbitary value (in this case 0), then extract the keys
+# from the hash at the end - they will all have to be unique
+sub uniq {
+        my %temp_hash = map { $_, 0 } @_;
+        return keys %temp_hash;
+}
 
 my $file = $ARGV[0] or die "Need to get CSV file on the command line\n";
 open( my $csvdata, '<:encoding(UTF-8)', $file )
@@ -27,6 +38,7 @@ open( my $csvdata, '<:encoding(UTF-8)', $file )
         or die "Could not open category file $!\n";
     local $/ = undef;    # slurp mode!
     $cat_json = <$category_file>;
+    close( $category_file );
 }
 
 my $catref = decode_json $cat_json;
@@ -36,6 +48,7 @@ my $cats   = $catref->{"data"};
 
 my @descs = map $_->{"desc"}, @{$cats};
 
+# self-explanatory
 sub get_category_from_desc {
     my $desc = shift;
     for my $hsh ( @{$cats} ) {
@@ -43,6 +56,7 @@ sub get_category_from_desc {
             return $hsh->{"category"};
         }
     }
+    return "UNKNOWN -> $desc";
 }
 
 while ( my $line = <$csvdata> ) {
@@ -50,34 +64,45 @@ while ( my $line = <$csvdata> ) {
     chomp $line;
     if ( $csv->parse($line) ) {
         my @fields = $csv->fields();
+
+        # parse the date
+        # everything ends up in the transaction hash
         $transaction{day}   = substr $fields[0], 0, 2;
         $transaction{month} = substr $fields[0], 3, 2;
         $transaction{year}  = substr $fields[0], 6, 4;
         $transaction{date}  = $fields[0];
+
+        # remove extraneous spaces from description
         $fields[1] =~ s/\s+/ /g;
 
         # used to create the categories json file - see below
         push @jlist, { "desc" => $fields[1], "category" => "NONE" };
 
+        # add the description and cost
         $transaction{desc} = $fields[1];
         $transaction{cost} = $fields[2];
 
-        for my $d (@descs) {
-            if ( $transaction{desc} eq $d ) {
-                $transaction{exp_type} = get_category_from_desc $d;
-            }
+        my $c = get_category_from_desc $transaction{desc};
+
+        if ( $c =~ /^UNKNOWN ->.*$/ ) {
+            push @uncategorised, $transaction{desc};
         }
+        else { $transaction{exp_type} = $c }
 
+        # parse the transaction type. Unused in ledger journal at moment
         if ( $fields[1] =~ /^.+(VIS|DR|DD|TFR|CR|SO|ATM|\)\)\))$/ ) {
             $transaction{type} = $1;
         }
         else { die("CANNOT DETERMINE TYPE!\n") }
 
+        # if the cost is negative, it is an expense category
         if ( $fields[2] =~ /^\-/ ) {
             $transaction{expense} = 1;
         }
         else { $transaction{expense} = 0; }
 
+        # write out the three line block representing the transaction
+        # in the ledger journal file
         print join "",
             (
             $transaction{year}, "/", $transaction{month}, "/",
@@ -85,6 +110,7 @@ while ( my $line = <$csvdata> ) {
             $transaction{desc}
             ),
             "\n";
+
         if ( $transaction{expense} == 1 ) {
             ( my $cost = $transaction{cost} ) =~ s/^\-//;
             print qq(\t$transaction{exp_type}\t$cost\n);
@@ -100,6 +126,10 @@ while ( my $line = <$csvdata> ) {
     else { warn "Line could not be parsed: $line\n"; }
 }
 
+say "Unrecognized payees that need to be added to categories.json:";
+
+for (uniq @uncategorised) { say "* $_" };
+
 # The following code is used to output a JSON file
 # to be used for categories. Uncomment for use.
 # my $data = encode_json {data => \@jlist};
author	Matthew Lemon <matt@matthewlemon.com>	2021-06-09 21:11:31 +0100
committer	Matthew Lemon <matt@matthewlemon.com>	2021-06-09 21:11:31 +0100
commit	89b003f133926d9c728b78f6c825904b92333f02 (patch)
tree	baf635cb9d38d89ea0b4979f7cd6639e5607109d /ledgerscripts/csv_processor
parent	e9fe1134f90bec4b3dc8b130dfcb47585d10670d (diff)