aboutsummaryrefslogtreecommitdiffstats
path: root/writing_tools/random_line.pl
diff options
context:
space:
mode:
authorMatthew Lemon <matt@matthewlemon.com>2022-09-13 19:38:17 +0100
committerMatthew Lemon <matt@matthewlemon.com>2022-09-13 19:38:17 +0100
commitb3ce73c143be4f3a69815c370c61339ecb94f47d (patch)
tree73e9e346834e72fe44dc2d5bd226a6e61f5ca2bf /writing_tools/random_line.pl
parentd84c321b74596ebeaed2ab504fd61d9e6899688e (diff)
finally we can get the title tag
Diffstat (limited to 'writing_tools/random_line.pl')
-rw-r--r--writing_tools/random_line.pl55
1 files changed, 52 insertions, 3 deletions
diff --git a/writing_tools/random_line.pl b/writing_tools/random_line.pl
index e85360f..a7f2eb8 100644
--- a/writing_tools/random_line.pl
+++ b/writing_tools/random_line.pl
@@ -2,17 +2,24 @@ use strict;
use warnings;
use English;
use Regexp::Common qw(URI);
+use LWP::UserAgent;
+use HTML::TreeBuilder 5 -weak;
+use HTML::HeadParser;
use feature qw(say);
# How to read each file in a directory $dir
my $numargs = $#ARGV + 1;
-if ($numargs != 1) {
+sub usage {
say "Pass a search term. All lines in the journal will be matched and URLs quoted will be extracted.";
exit;
}
+if ($numargs != 1) {
+ usage();
+}
+
my @targetlines;
my $searchterm = $ARGV[0];
@@ -30,13 +37,55 @@ foreach my $f (glob("$dir/*.md")) {
}
close $fh or die "can't read close file '$f': $OS_ERROR";
}
+#
+# Let's interact with the World Wide Web!
+my $ua = LWP::UserAgent->new;
+$ua->agent("Mozilla/8.0");
foreach my $line (@targetlines) {
# if ($line =~ /(http.*$)/) {
if ($line =~ m/$RE{URI}{HTTP}{-scheme => qr<https?>}{-keep}/) {
my$t = $1;
$t =~ s/\.$//; # remove the fullstop if it has one at the end
- print "Saving: $t\n";
- push @urls => $t
+ # print "Saving: $t\n";
+ # push @urls => $t
+ my $req = HTTP::Request->new(GET => $t);
+ $req->header(Accept => "text/html");
+ my $res = $ua->request($req);
+ my $p = HTML::HeadParser->new;
+ $p->parse($res->content) and print "not finished";
+ print $p->header('Title'), "\n";
+ # my $root = HTML::TreeBuilder->new_from_content($res->content);
+ # my $title = $root->look_down('_tag' => 'title');
+ # my $value = $title->attr('value');
}
}
+
+
+# foreach my $url (@urls) {
+# print $url;
+# my $req = HTTP::Request->new(GET => $url);
+# $req->header(Accept => "text/html");
+# my $res = $ua->request($req);
+
+# my $root = HTML::TreeBuilder->new_from_content($req->content);
+
+# print $root;
+# # my @elements = $root->look_down(_tag => "title");
+# # foreach my $thing (@elements) {
+# # print $thing->as_text, "\n";
+# # }
+# }
+
+
+
+
+
+
+# if ($res->is_success) {
+# $tree->parse($res->as_string);
+# }
+# else {
+# print $res->status_line, "\n";
+# }
+