diff options
author | Matthew Lemon <matt@matthewlemon.com> | 2022-09-13 19:38:17 +0100 |
---|---|---|
committer | Matthew Lemon <matt@matthewlemon.com> | 2022-09-13 19:38:17 +0100 |
commit | b3ce73c143be4f3a69815c370c61339ecb94f47d (patch) | |
tree | 73e9e346834e72fe44dc2d5bd226a6e61f5ca2bf /writing_tools/random_line.pl | |
parent | d84c321b74596ebeaed2ab504fd61d9e6899688e (diff) |
finally we can get the title tag
Diffstat (limited to 'writing_tools/random_line.pl')
-rw-r--r-- | writing_tools/random_line.pl | 55 |
1 files changed, 52 insertions, 3 deletions
diff --git a/writing_tools/random_line.pl b/writing_tools/random_line.pl index e85360f..a7f2eb8 100644 --- a/writing_tools/random_line.pl +++ b/writing_tools/random_line.pl @@ -2,17 +2,24 @@ use strict; use warnings; use English; use Regexp::Common qw(URI); +use LWP::UserAgent; +use HTML::TreeBuilder 5 -weak; +use HTML::HeadParser; use feature qw(say); # How to read each file in a directory $dir my $numargs = $#ARGV + 1; -if ($numargs != 1) { +sub usage { say "Pass a search term. All lines in the journal will be matched and URLs quoted will be extracted."; exit; } +if ($numargs != 1) { + usage(); +} + my @targetlines; my $searchterm = $ARGV[0]; @@ -30,13 +37,55 @@ foreach my $f (glob("$dir/*.md")) { } close $fh or die "can't read close file '$f': $OS_ERROR"; } +# +# Let's interact with the World Wide Web! +my $ua = LWP::UserAgent->new; +$ua->agent("Mozilla/8.0"); foreach my $line (@targetlines) { # if ($line =~ /(http.*$)/) { if ($line =~ m/$RE{URI}{HTTP}{-scheme => qr<https?>}{-keep}/) { my$t = $1; $t =~ s/\.$//; # remove the fullstop if it has one at the end - print "Saving: $t\n"; - push @urls => $t + # print "Saving: $t\n"; + # push @urls => $t + my $req = HTTP::Request->new(GET => $t); + $req->header(Accept => "text/html"); + my $res = $ua->request($req); + my $p = HTML::HeadParser->new; + $p->parse($res->content) and print "not finished"; + print $p->header('Title'), "\n"; + # my $root = HTML::TreeBuilder->new_from_content($res->content); + # my $title = $root->look_down('_tag' => 'title'); + # my $value = $title->attr('value'); } } + + +# foreach my $url (@urls) { +# print $url; +# my $req = HTTP::Request->new(GET => $url); +# $req->header(Accept => "text/html"); +# my $res = $ua->request($req); + +# my $root = HTML::TreeBuilder->new_from_content($req->content); + +# print $root; +# # my @elements = $root->look_down(_tag => "title"); +# # foreach my $thing (@elements) { +# # print $thing->as_text, "\n"; +# # } +# } + + + + + + +# if ($res->is_success) { +# $tree->parse($res->as_string); +# } +# else { +# print $res->status_line, "\n"; +# } + |