diff options
author | Matthew Lemon <matt@matthewlemon.com> | 2022-09-13 20:47:22 +0100 |
---|---|---|
committer | Matthew Lemon <matt@matthewlemon.com> | 2022-09-13 20:47:22 +0100 |
commit | b31e80034ec923161b91196e169e7e4c261bd4de (patch) | |
tree | e171b485d5d629d10a5eee66009c00b0437b904b /writing_tools/random_line.pl | |
parent | 18588d5bbbb628c7aff04cbfb8b8ad5967c9bd7d (diff) |
clean up a bit and rename
Diffstat (limited to 'writing_tools/random_line.pl')
-rw-r--r-- | writing_tools/random_line.pl | 119 |
1 files changed, 0 insertions, 119 deletions
diff --git a/writing_tools/random_line.pl b/writing_tools/random_line.pl deleted file mode 100644 index 4723f0d..0000000 --- a/writing_tools/random_line.pl +++ /dev/null @@ -1,119 +0,0 @@ -use strict; -use warnings; -use English; -use Regexp::Common qw(URI); -use LWP::UserAgent; -use HTML::TreeBuilder 5 -weak; -use HTML::HeadParser; -use feature qw(say); - -# How to read each file in a directory $dir - -my $numargs = $#ARGV + 1; - -sub usage { - say "Pass a search term. All lines in the journal will be matched and URLs quoted will be extracted."; - exit; -} - -if ($numargs != 1) { - usage(); -} - - -my @targetlines; -my $searchterm = $ARGV[0]; -my @urls; - -my $dir = '/home/lemon/Notes/journal'; -foreach my $f (glob("$dir/*.md")) { - # printf "%s\n", $f; - open my $fh, "<", $f or die "Cannot open that file '$f': $OS_ERROR"; - while (<$fh>) { - if ($_ =~ m/$searchterm/) { - # printf " %s", $_; - push @targetlines, $_; - } - } - close $fh or die "can't read close file '$f': $OS_ERROR"; -} - -sub striptime { - my $url = shift; - $url =~ s/\?t=\d*//; - return $url; -} - -# -# Let's interact with the World Wide Web! -my $ua = LWP::UserAgent->new; -$ua->agent("Mozilla/8.0"); - -foreach my $line (@targetlines) { - # if ($line =~ /(http.*$)/) { - if ($line =~ m/$RE{URI}{HTTP}{-scheme => qr<https?>}{-keep}/) { - my$t = $1; - $t =~ s/\.$//; # remove the fullstop if it has one at the end - push @urls => striptime($t) - # my $req = HTTP::Request->new(GET => $t); - # $req->header(Accept => "text/html"); - # my $res = $ua->request($req); - # my $p = HTML::HeadParser->new; - # $p->parse($res->content) and print "not finished"; - # print $p->header('Title'), "\n"; - # my $root = HTML::TreeBuilder->new_from_content($res->content); - # my $title = $root->look_down('_tag' => 'title'); - # my $value = $title->attr('value'); - } -} - -# get rid of duplicates from array or urls -# see perlfaq4 -my %riddups = map { $_, 1 } @urls; -my @uniqueurls = keys %riddups; - -sub create_mdlink { - my ($url, $title) = @_; - return "[".$title."]"."(".$url.")" - -} - -foreach my $url (@uniqueurls) { - my $req = HTTP::Request->new(GET => $url); - $req->header(Accept => "text/html"); - my $res = $ua->request($req); - my $p = HTML::HeadParser->new; - $p->parse($res->content) and print "not finished"; - my $title = $p->header('Title'); - print create_mdlink($url, $title), "\n"; -} - - - -# foreach my $url (@urls) { -# print $url; -# my $req = HTTP::Request->new(GET => $url); -# $req->header(Accept => "text/html"); -# my $res = $ua->request($req); - -# my $root = HTML::TreeBuilder->new_from_content($req->content); - -# print $root; -# # my @elements = $root->look_down(_tag => "title"); -# # foreach my $thing (@elements) { -# # print $thing->as_text, "\n"; -# # } -# } - - - - - - -# if ($res->is_success) { -# $tree->parse($res->as_string); -# } -# else { -# print $res->status_line, "\n"; -# } - |