aboutsummaryrefslogtreecommitdiffstats
path: root/writing_tools/random_line.pl
diff options
context:
space:
mode:
authorMatthew Lemon <matt@matthewlemon.com>2022-09-13 19:59:24 +0100
committerMatthew Lemon <matt@matthewlemon.com>2022-09-13 19:59:24 +0100
commitbcac8134ea12369d6cf8ba7068ec78fd49a47223 (patch)
treebfb56b1260df902ad532b4e2d409e3513719c097 /writing_tools/random_line.pl
parentb3ce73c143be4f3a69815c370c61339ecb94f47d (diff)
better processing of urls
Diffstat (limited to 'writing_tools/random_line.pl')
-rw-r--r--writing_tools/random_line.pl35
1 files changed, 27 insertions, 8 deletions
diff --git a/writing_tools/random_line.pl b/writing_tools/random_line.pl
index a7f2eb8..fe19ab7 100644
--- a/writing_tools/random_line.pl
+++ b/writing_tools/random_line.pl
@@ -37,6 +37,13 @@ foreach my $f (glob("$dir/*.md")) {
}
close $fh or die "can't read close file '$f': $OS_ERROR";
}
+
+sub striptime {
+ my $url = shift;
+ $url =~ s/\?t=\d*//;
+ return $url;
+}
+
#
# Let's interact with the World Wide Web!
my $ua = LWP::UserAgent->new;
@@ -47,14 +54,13 @@ foreach my $line (@targetlines) {
if ($line =~ m/$RE{URI}{HTTP}{-scheme => qr<https?>}{-keep}/) {
my$t = $1;
$t =~ s/\.$//; # remove the fullstop if it has one at the end
- # print "Saving: $t\n";
- # push @urls => $t
- my $req = HTTP::Request->new(GET => $t);
- $req->header(Accept => "text/html");
- my $res = $ua->request($req);
- my $p = HTML::HeadParser->new;
- $p->parse($res->content) and print "not finished";
- print $p->header('Title'), "\n";
+ push @urls => striptime($t)
+ # my $req = HTTP::Request->new(GET => $t);
+ # $req->header(Accept => "text/html");
+ # my $res = $ua->request($req);
+ # my $p = HTML::HeadParser->new;
+ # $p->parse($res->content) and print "not finished";
+ # print $p->header('Title'), "\n";
# my $root = HTML::TreeBuilder->new_from_content($res->content);
# my $title = $root->look_down('_tag' => 'title');
# my $value = $title->attr('value');
@@ -62,6 +68,19 @@ foreach my $line (@targetlines) {
}
+foreach my $url (@urls) {
+ print "URL: $url\n";
+ my $req = HTTP::Request->new(GET => $url);
+ $req->header(Accept => "text/html");
+ my $res = $ua->request($req);
+ my $p = HTML::HeadParser->new;
+ $p->parse($res->content) and print "not finished";
+ print "TITLE:", $p->header('Title'), "\n";
+ print "\n";
+}
+
+
+
# foreach my $url (@urls) {
# print $url;
# my $req = HTTP::Request->new(GET => $url);