###############################
# RssMaker -- Genera un alimentador
# RSS de una página web
# Mike Schilli, 2004
# (m@perlmeister.com)
###################################
package RssMaker;
use warnings;
use strict;
use LWP::UserAgent;
use HTTP::Request::Common;
use XML::RSS;
use HTML::Entities
qw(decode_entities);
use URI::URL;
use HTTP::Date;
use DateTime;
use HTML::TreeBuilder;
use Log::Log4perl qw(:easy);
#############################
sub make {
#############################
  my (%o) = @_;
  $o{url}
    || LOGDIE "url ausente";
  $o{title}
    || LOGDIE
  "titulo ausente";
  $o{output} ||= "out.rdf";
  $o{filter} ||= sub { 1 };
  $o{encoding} ||= 'utf-8';
  my $ua =
    LWP::UserAgent->new();
  INFO "Sacando $o{url}";
  my $resp =
    $ua->request(
    GET $o{url} );
  LOGDIE "Error sacando ",
      "$o{url}"
    if $resp->is_error();
  my $http_time =
    $resp->header(
    'last-modified');
  $http_time ||=
    time2str( time() );
  INFO "Ultima modificación: ",
    $http_time;
  my $mtime =
    str2time($http_time);
  my $isotime =
    DateTime->from_epoch(
    epoch => $mtime);
  DEBUG "Ultima modificación:",
    $isotime;
  my $rss =
    XML::RSS->new(
    encoding =>
      $o{encoding} );
  $rss->channel(
    title => $o{title},
    link => $o{url},
    dc => {
      date => $isotime . "Z"
    },
  );
  foreach(exlinks(
    $resp->content(),
    $o{url})) {

    my ($lurl, $text) = @$_;

    $text =
      decode_entities($text);

    if ($o{filter}->( 
      $lurl, $text)) {
    INFO "Adding rss ",
      "entry: $text $lurl";
    $rss->add_item(
      title => $text,  
      link => $lurl);
    }
  }
  INFO "Guardando la salida en ",
    "$o{output}";
  $rss->save( $o{output} )
    or LOGDIE "Cannot write",
      " to", " $o{output}";
}
#############################
sub exlinks {
#############################
  my ($html, $base_url) = @_;
  my @links = ();
  my $tree =
    HTML::TreeBuilder->new();

  $tree->parse($html)
    or return ();
  for(@{$tree->extract_links(
    'a')}) {
   my ($link, $element,
    $attr, $tag) = @$_;
  
  next
    unless $attr eq "href";

  my $uri =
    URI->new_abs( $link,
    $base_url );
  
  next
    unless length $element
    ->as_trimmed_text();
  
  push @links,
    [
    URI->new_abs(
      $link, $base_url
    ),
    $element
      ->as_trimmed_text()
    ];
  }

  return @links;
}

1;

@KT:Listado 2:bpo2rss 
@LI:#!/usr/bin/perl
 #############################
 # bpo2rss -- books.perl.org
 # generador de alimentador RSS 
 # Mike Schilli, 2004
 # (m@perlmeister.com)
 #############################
 use warnings;
 use strict;

 use RssMaker;
 use Log::Log4perl qw(:easy);

 Log::Log4perl->easy_init(
   $INFO);

 my $url =
   'http://books.perl.org/';

 RssMaker::make(
   url => $url,
   title => "books.perl.org",
   filter => sub {
     my ( $link, $text ) = @_;

   return 1
     if $link =~
       m#/book/\d+#;
   return 0;
   },
   output => "bpo.rss",
 ); 
