#!/usr/bin/perl -w use strict; # Creating a weekly archive of http://livejournal.com/users/tiphareth/friends/ # Written by Misha Verbitsky under GPL (11 07 03) # # I used parts of ljsm.pl (CVS revision 1.51) by # Alexander Nikolaev use constant UTF8_DECODE => 1; # convert text to local charset use constant LOCAL_CHARSET => 'KOI8-R'; use constant DEBUG_LEVEL => 2; # 0 - quiet, 1 - essential, 2 - verbose use constant SLEEP => 30; # sleep 30 seconds if download is impossible use constant BASE_URL => 'http://www.livejournal.com/'; my $INDEX_URL = "http://www.livejournal.com/users/tiphareth/friends?skip="; my $LOCAL_DIR = "/www/imperium.lenin.ru/html/LENTA/"; umask 0022; use LWP::UserAgent; use HTML::SimpleLinkExtor; use File::Path; use Unicode::MapUTF8 qw(from_utf8); my $ua = new LWP::UserAgent; my %images = (); # Making dir for saving, my $date = localtime; my @date = split(/ +/,$date); my $day ="$date[2]-$date[1]-$date[4]"; my $savedir = $LOCAL_DIR . $day; mkpath($savedir, DEBUG_LEVEL, 0755); for (0..39) { my $ind=25*$_; get_index ($ind); } sub get_index { my $num_index= $_[0]; my $fname="index"."$num_index".".html"; my $index = get_page_nofail ($INDEX_URL . "$num_index"); %images = (); my $result =""; foreach (split(/\n/, $index)) { $_ = from_utf8({-string => $_, -charset => LOCAL_CHARSET}) if (UTF8_DECODE); $result .= "$_\n"; map { $images{$_} = 1} grep(/src=.http:\/\/[a-z]*\.livejournal.com/, split(/\s+/, $_)); } logmsg("processing pictures and relative links...\n", 4); $result =~ s/src=(.)http:\/\/[a-z]*\.livejournal.com/src=$1\.\.\/IMG/sg; # replace all relative links with absolute ones $result =~ s/|


Advertisement on IMPERIUM.LENIN.RU:
<\/body>|sg; get_pics(); logmsg(">> $savedir/$fname\n",2); open DF,">$savedir/$fname" or die "error opening $savedir/$fname for writing: $!\n"; print DF $result; close DF; } sub get_page { my ($url) = @_; logmsg("<< $url\n",2); my $req = new HTTP::Request GET => $url; #send request my $res = $ua->request($req); #process response if ($res->is_success) { return wantarray? ($res->content, $res->content_type) : $res->content; } else { logmsg("$url\:\n" . $res->error_as_HTML() . "\n", 0); return undef; } } sub get_page_nofail { my ($url) = @_; POVTOR: logmsg("<< $url\n",2); my $req = new HTTP::Request GET => $url; #send request my $res = $ua->request($req); #process response if ($res->is_success) { return wantarray? ($res->content, $res->content_type) : $res->content; } else { logmsg("$url\:\n" . $res->error_as_HTML() . "\n", 0); sleep SLEEP; goto POVTOR; } } sub get_pics { my ($imgsrc, $img, $target); logmsg("getting pictures...\n",2); foreach $imgsrc (keys %images) { # print "Getting $imgsrc...\n"; # test if there is already image with the same name $imgsrc =~ s/"//g; $imgsrc =~ s/'//g; $imgsrc =~ s/src=//; $target = $imgsrc; $target =~ s/http:.*livejournal.com//; $target = $LOCAL_DIR . "/IMG/" . $target; # print "Putting it to $target...\n"; next if ($target =~ m{^http://}); next if (-f $target); my $targetdir = $target; $targetdir =~ s/\/[^\/]*$//; # get image if ($img = get_page($imgsrc)) { mkpath($targetdir, DEBUG_LEVEL, 0755); if (open (DF, ">$target")) { binmode DF; print DF $img; close DF; } else { logmsg("error opening $target for writing: $!\n",0); } } } } sub logmsg { my ($message, $loglvl) = @_; if (!defined $loglvl) { print $message; } else { warn $message if ($loglvl <= DEBUG_LEVEL); } }