diff options
Diffstat (limited to 'journal')
-rw-r--r-- | journal/create_md_links_from_journal_urls.pl | 100 | ||||
-rw-r--r-- | journal/get_journal_tags.pl | 17 | ||||
-rwxr-xr-x | journal/pull_url_from_journal.pl | 14 |
3 files changed, 131 insertions, 0 deletions
diff --git a/journal/create_md_links_from_journal_urls.pl b/journal/create_md_links_from_journal_urls.pl new file mode 100644 index 0000000..c1801f5 --- /dev/null +++ b/journal/create_md_links_from_journal_urls.pl @@ -0,0 +1,100 @@ +use strict; +use warnings; +no warnings 'utf8'; # prevent the wide character warning you get on STDOUT +use English; +use Regexp::Common qw(URI); +use LWP::UserAgent; +use HTML::TreeBuilder 5 -weak; +use HTML::HeadParser; +use feature qw(say); + +# $| = 1; # turn on autoflush for stdout (https://stackoverflow.com/questions/40608986/print-doesnt-work-while-iterations-are-going-inside-foreach-loop) + +# How to read each file in a directory $dir + +my $numargs = $#ARGV + 1; + +sub usage { + say "Pass a search term or -all. All lines in the journal will be matched and URLs quoted will be extracted, or -all will do all URLs.."; + exit; +} + +if ($numargs != 1) { + usage(); +} + + +my @targetlines; +my $searchterm = $ARGV[0]; + +my @urls; + +my $dir = '/home/lemon/Notes/journal'; + +foreach my $f (glob("$dir/*.md")) { + # printf "%s\n", $f; + open my $fh, "<", $f or die "Cannot open that file '$f': $OS_ERROR"; + while (<$fh>) { + if ($searchterm eq "-all") { + push @targetlines, $_; + } + else { + if ($_ =~ m/$searchterm/) { + # printf " %s", $_; + push @targetlines, $_; + } + } + } + close $fh or die "can't read close file '$f': $OS_ERROR"; +} + +sub striptime { + my $url = shift; + $url =~ s/\?t=\d*//; + return $url; +} + +# +# Let's interact with the World Wide Web! +my $ua = LWP::UserAgent->new; +$ua->agent("Mozilla/8.0"); + +foreach my $line (@targetlines) { + # if ($line =~ /(http.*$)/) { + if ($line =~ m/$RE{URI}{HTTP}{-scheme => qr<https?>}{-keep}/) { + my$t = $1; + $t =~ s/\.$//; # remove the fullstop if it has one at the end + push @urls => striptime($t) + } +} + +# get rid of duplicates from array or urls +# see perlfaq4 +my %riddups = map { $_, 1 } @urls; +my @uniqueurls = keys %riddups; + +sub create_mdlink { + my ($url, $title) = @_; + if ($title eq "") { + $title = "- UKNOWN TITLE -"; + } + + return "[".$title."]"."(".$url.")" + +} + +my @mdurls; + +foreach my $url (@uniqueurls) { + my $req = HTTP::Request->new(GET => $url); + $req->header(Accept => "text/html"); + my $res = $ua->request($req); + my $p = HTML::HeadParser->new; + $p->parse($res->content) and print "not finished"; + my $title = $p->header('Title'); + push @mdurls => create_mdlink($url, $title); +} + +$, = "\n\n- "; +print "\n\n"; +print @mdurls; diff --git a/journal/get_journal_tags.pl b/journal/get_journal_tags.pl new file mode 100644 index 0000000..2dbd2e7 --- /dev/null +++ b/journal/get_journal_tags.pl @@ -0,0 +1,17 @@ +use strict; +use warnings; + +my @tags; + +foreach my $fn (glob "~/Notes/journal/*.md") { + open my $FH, "<", $fn or die "Cannot open $fn"; + while (<$FH>) { + if (/(\s:[A-Z]\w+)/) { + push @tags, $1; + } + + } +} +my %truetags = map { $_, 1 } @tags; +my @trutags = keys %truetags; +print @trutags, "\n"; diff --git a/journal/pull_url_from_journal.pl b/journal/pull_url_from_journal.pl new file mode 100755 index 0000000..66941cf --- /dev/null +++ b/journal/pull_url_from_journal.pl @@ -0,0 +1,14 @@ +#!/usr/bin/env perl + +use warnings; +use strict; + +my $journal = "/home/lemon/Notes/journal"; + +my $line = `grep -R $ARGV[0] $journal | cut -f3- -d' '| fzf `; +chomp $line; + +if ($line =~ m/(http.*)/) { + print $1; + # system('qutebrowser', $1); +} |