# $Id: QRSS.pm,v 1.3 2004/11/29 09:10:32 fuseda Exp $ # simple RSS make package from XML::RSS.pm # original: XML::RSS.pm # Original code: Jonathan Eisenzopf # Further changes: Rael Dornfest # customize: QRSS.pm # qux package QRSS; use strict; use Carp; use vars qw($VERSION $modules $AUTOLOAD); my $modules = { 'syn' => 'http://purl.org/rss/1.0/modules/syndication/', 'dc' => 'http://purl.org/dc/elements/1.1/', 'taxo' => 'http://purl.org/rss/1.0/modules/taxonomy/', 'admin' => 'http://webns.net/mvcb/', 'content' => 'http://purl.org/rss/1.0/modules/content/', }; my %v1_0_ok_fields = ( channel => { title => '', description => '', link => '', }, image => { title => '', url => '', link => '' }, items => [], version => '', encoding => '', output => '', ); # constuctor sub new { my $invocant = shift; my $class = ref($invocant) || $invocant; my $self = {}; bless( $self, $class ); $self->_initialize ( @_ # and overwrite ); return $self; } sub _initialize { my $self = shift; my %hash = @_; $self->{items} = []; $self->{modules} = (); $self->{version} = $hash{version} || '1.0'; $self->{encoding} = $hash{encoding} || 'UTF-8'; foreach my $i (qw(channel image)) { my %template = %{$v1_0_ok_fields{$i}}; $self->{$i} = \%template; } } # methods # add sub add_item { my $self = shift; my $hash = {@_}; # add the item to the list push (@{$self->{items}}, $hash); # return reference to the list of items return $self->{items}; } sub add_module { my $self = shift; my $hash = {@_}; $hash->{prefix} =~ /^[a-z_][a-z0-9.-_]*$/ or die "a namespace prefix should look like [a-z_][a-z0-9.-_]*"; my $uri = $hash->{uri} || $$modules{$hash->{prefix}}; $uri or die "a URI must be provided in a namespace declaration for '$hash->{prefix}'"; $self->{modules}->{$uri} = $hash->{prefix}; } sub add_stylesheet { my $self = shift; my $stylesheet = shift; push @{$self->{stylesheets}}, $stylesheet; } # RSS 1.0 output sub as_rss_1_0 { my $self = shift; my $output; # XML declaration $output .= '{encoding}.'"?>'."\n\n"; # stylesheet declaration foreach my $sheet ( @{$self->{stylesheets}} ) { $output .= '\n"; } # RDF namespaces declaration $output .="{modules}}) { $output.=" xmlns:$v=\"$k\"\n"; } $output .=">"."\n\n"; ################### # Channel Element # ################### unless ( defined($self->{channel}->{'about'}) ) { $output .= ''."\n"; } else { $output .= ''."\n"; } # title ( required , suggested maximum length:40 ) $output .= ''. $self->encode($self->{channel}->{title}) .''."\n"; # link ( required , suggested maximum length:500 ) $output .= ''. $self->encode($self->{channel}->{'link'}) .''."\n"; # description ( required , suggested maximum length:500 ) $output .= ''. $self->encode($self->{channel}->{description}) .''."\n"; # モジュール依存ヘッダー # Ad-hoc modules while ( my($url, $prefix) = each %{$self->{modules}} ) { while ( my($el, $value) = each %{$self->{channel}->{$prefix}} ) { $output .= "<$prefix:$el>". $self->encode($value) ."\n"; } } # Seq items ( required ) $output .= "\n \n"; foreach my $item (@{$self->{items}}) { my $about = ( defined($item->{'about'}) ) ? $item->{'about'} : $item->{'link'}; $output .= ' '."\n"; } $output .= " \n\n"; # image ( required only if image element present ) $self->{image}->{url} and $output .= ''."\n"; # end channel element $output .= ''."\n\n"; ################# # image element # ################# if ($self->{image}->{url}) { $output .= ''."\n"; # title $output .= ''. $self->encode($self->{image}->{title}) .''."\n"; # url $output .= ''. $self->encode($self->{image}->{url}) .''."\n"; # link $output .= ''. $self->encode($self->{image}->{'link'}) .''."\n" if $self->{image}->{link}; # dc and other ad-hoc modules # $output .= ''."\n\n"; } ################ # item element # ################ foreach my $item (@{$self->{items}}) { if ($item->{title}) { my $about = ( defined($item->{'about'}) ) ? $item->{'about'} : $item->{'link'}; $output .= 'encode($item->{title}) .''."\n"; $output .= ''. $self->encode($item->{'link'}) .''."\n"; $item->{description} and $output .= ''. $self->encode($item->{description}) .''."\n"; # Ad-hoc modules while ( my($url, $prefix) = each %{$self->{modules}} ) { while ( my($el, $value) = each %{$item->{$prefix}} ) { $output .= "<$prefix:$el>". $self->encode($value) ."\n"; } } # end item element $output .= ''."\n\n"; } } # end foreach my $item (@{$self->{items}}) { $output .= "\n"; } sub as_string { my $self = shift; my $version = $self->{version}; my $output; if ( $version eq '1.0' ) { $output = &as_rss_1_0($self); } return $output; } sub AUTOLOAD { my $self = shift; my $type = ref($self) || die "$self is not an object\n"; my $name = $AUTOLOAD; $name =~ s/.*://; return if $name eq 'DESTROY'; croak "Unregistered entity: Can't access $name field in object of class $type" unless (exists $self->{$name}); # return reference to RSS structure if (@_ == 1) { return $self->{$name}->{$_[0]} if defined $self->{$name}->{$_[0]}; # we're going to set values here } elsif (@_ > 1) { my %hash = @_; my $_REQ; # store data in object foreach my $key (keys(%hash)) { $self->{$name}->{$key} = $hash{$key}; } # return value return $self->{$name}; # otherwise, just return a reference to the whole thing } else { return $self->{$name}; } return 0; } # the code here is a minorly tweaked version of code from # Matts' rssmirror.pl script # my %entity = ( nbsp => " ", iexcl => "¡", cent => "¢", pound => "£", curren => "¤", yen => "¥", brvbar => "¦", sect => "§", uml => "¨", copy => "©", ordf => "ª", laquo => "«", not => "¬", shy => "­", reg => "®", macr => "¯", deg => "°", plusmn => "±", sup2 => "²", sup3 => "³", acute => "´", micro => "µ", para => "¶", middot => "·", cedil => "¸", sup1 => "¹", ordm => "º", raquo => "»", frac14 => "¼", frac12 => "½", frac34 => "¾", iquest => "¿", Agrave => "À", Aacute => "Á", Acirc => "Â", Atilde => "Ã", Auml => "Ä", Aring => "Å", AElig => "Æ", Ccedil => "Ç", Egrave => "È", Eacute => "É", Ecirc => "Ê", Euml => "Ë", Igrave => "Ì", Iacute => "Í", Icirc => "Î", Iuml => "Ï", ETH => "Ð", Ntilde => "Ñ", Ograve => "Ò", Oacute => "Ó", Ocirc => "Ô", Otilde => "Õ", Ouml => "Ö", times => "×", Oslash => "Ø", Ugrave => "Ù", Uacute => "Ú", Ucirc => "Û", Uuml => "Ü", Yacute => "Ý", THORN => "Þ", szlig => "ß", agrave => "à", aacute => "á", acirc => "â", atilde => "ã", auml => "ä", aring => "å", aelig => "æ", ccedil => "ç", egrave => "è", eacute => "é", ecirc => "ê", euml => "ë", igrave => "ì", iacute => "í", icirc => "î", iuml => "ï", eth => "ð", ntilde => "ñ", ograve => "ò", oacute => "ó", ocirc => "ô", otilde => "õ", ouml => "ö", divide => "÷", oslash => "ø", ugrave => "ù", uacute => "ú", ucirc => "û", uuml => "ü", yacute => "ý", thorn => "þ", yuml => "ÿ", ); my $entities = join('|', keys %entity); sub encode { my ($self, $text) = @_; return $text unless $self->{'encode_output'}; my $encoded_text = ''; while ( $text =~ s/(.*?)(\<\!\[CDATA\[.*?\]\]\>)//s ) { $encoded_text .= encode_text($1) . $2; } $encoded_text .= encode_text($text); return $encoded_text; } sub encode_text { my $text = shift; $text =~ s/&(?!(#[0-9]+|#x[0-9a-fA-F]+|\w+);)/&/g; $text =~ s/&($entities);/$entity{$1}/g; $text =~ s//>/g; return $text; } 1; __END__ =head1 NAME QRSS - creates RSS file =head1 SYNOPSIS # create an RSS 1.0 file (http://purl.org/rss/1.0/) use QRSS; my $rss = new QRSS (version => '1.0'); $rss->channel( title => "ham", link => "http://qux.hauN.org", description => "diary for ham", dc => { date => '2004-11-29T07:00+09:00', subject => "Diary", creator => 'qux_QRSS@hauN.org', publisher => 'qux_QRSS@hauN.org', rights => 'Copyright 2004, qux', language => 'ja', }, ); $rss->image( title => "ham", url => "http://qux.hauN.org/images/sivainu.jpg", link => "http://qux.hauN.org", dc => { creator => "QUX (graphics at qux.hauN.org)", }, ); $rss->add_item( title => "siva inu", link => "http://qux.haun.org/note0410.html#23-2", description => "cool dog ...", dc => { subject => "animals", creator => "qux", }, ); # Optionally mixing in elements of a non-standard module/namespace $rss->add_module(prefix=>'my', uri=>'http://purl.org/my/rss/module/'); $rss->add_item( title => "QRSS.pm", link => "http://qux.haun.org/QRSS.pm.txt", description => "QRSS is an RSS generate package...", my => { rating => "A+", category => "RSS", }, ); $rss->add_item (title=>$title, link=>$link, slash=>{ topic=>$topic }); =head1 DESCRIPTION This module provides a basic framework for creating and maintaining RDF Site Summary (RSS) files. This might be helpful if you want to include news feeds on your Web site from sources like Slashot and Freshmeat or if you want to syndicate your own content. QRSS currently support 1.0 version of RSS. See http://purl.org/rss/1.0/ for RSS 1.0. RSS was originally developed by Netscape as the format for Netscape Netcenter channels, however, many Web sites have since adopted it as a simple syndication format. With the advent of RSS 1.0, users are now able to syndication many different kinds of content including news headlines, threaded measages, products catalogs, etc. =head1 METHODS =over 4 =item new QRSS ( version => $version, encoding => $encoding ) Constructor for QRSS. It returns a reference to an QRSS object. You may also pass the RSS version and the XML encoding to use. The default B is 1.0. The default B is UTF-8. You may also specify the B format regarless of the input version. This comes in handy when you want to convert RSS between versions. The QRSS modules will convert between any of the formats. If you set QRSS will make sure to encode any entities in generated RSS. This is now on by default. =item add_item (title=>$title, link=>$link, description=>$desc) Adds an item to the QRSS object. B and B are optional. The default B is append, which adds the item to the end of the list. To insert an item, set the mode to B. The items are stored in the array @{$obj->{'items'}} where B<$obj> is a reference to an QRSS object. =item as_string; Returns a string containing the RSS for the QRSS object. This method will also encode special characters along the way. =item channel (title=>$title, link=>$link, description=>$desc, language=>$language, rating=>$rating, copyright=>$copyright, pubDate=>$pubDate, lastBuildDate=>$lastBuild, docs=>$docs, managingEditor=>$editor, webMaster=>$webMaster) Channel information is required in RSS. The B cannot be more the 40 characters, the B<link> 500, and the B<description> 500 when outputting RSS 0.9. B<title>, B<link>, and B<description>, are required for RSS 1.0. B<language> is required for RSS 0.91. The other parameters are optional for RSS 0.91 and 1.0. To retreive the values of the channel, pass the name of the value (title, link, or description) as the first and only argument like so: $title = channel('title'); =item image (title=>$title, url=>$url, link=>$link, width=>$width, height=>$height, description=>$desc) Adding an image is not required. B<url> is the URL of the image, B<link> is the URL the image is linked to. B<title>, B<url>, and B<link> parameters are required if you are going to use an image in your RSS file. The remaining image elements are used in RSS 0.91 or optionally imported into RSS 1.0 via the rss091 namespace. The method for retrieving the values for the image is the same as it is for B<channel()>. =item add_module(prefix=>$prefix, uri=>$uri) Adds a module namespace declaration to the QRSS object, allowing you to add modularity outside of the the standard RSS 1.0 modules. At present, the standard modules Dublin Core (dc) and Syndication (syn) are predefined for your convenience. The Taxonomy (taxo) module is also internally supported. The modules are stored in the hash %{$obj->{'modules'}} where B<$obj> is a reference to an QRSS object. If you want to automatically add modules that the parser finds in namespaces, set the $QRSS::AUTO_ADD variable to a true value. By default the value is false. =back =head2 RSS 1.0 MODULES XML-Namespace-based modularization affords RSS 1.0 compartmentalized extensibility. The only modules that ship "in the box" with RSS 1.0 are Dublin Core (http://purl.org/rss/1.0/modules/dc/), Syndication (http://purl.org/rss/1.0/modules/syndication/), and Taxonomy (http://purl.org/rss/1.0/modules/taxonomy/). Consult the appropriate module's documentation for further information. Adding items from these modules in QRSS is as simple as adding other attributes such as title, link, and description. The only difference is the compartmentalization of their key/value paris in a second-level hash. $rss->add_item (title=>$title, link=>$link, dc=>{ subject=>$subject, creator=>$creator }); For elements of the Dublin Core module, use the key 'dc'. For elements of the Syndication module, 'syn'. For elements of the Taxonomy module, 'taxo'. These are the prefixes used in the RSS XML document itself. They are associated with appropriate URI-based namespaces: syn: http://purl.org/rss/1.0/modules/syndication/ dc: http://purl.org/dc/elements/1.1/ taxo: http://purl.org/rss/1.0/modules/taxonomy/ Dublin Core elements may occur in channel, image, item(s), and textinput -- albeit uncomming to find them under image and textinput. Syndication elements are limited to the channel element. Taxonomy elements can occur in the channel or item elements. Access to module elements after parsing an RSS 1.0 document using QRSS is via either the prefix or namespace URI for your convenience. print $rss->{items}->[0]->{dc}->{subject}; or print $rss->{items}->[0]->{'http://purl.org/dc/elements/1.1/'}->{subject}; QRSS also has support for "non-standard" RSS 1.0 modularization at the channel, image, item, and textinput levels. Parsing an RSS document grabs any elements of other namespaces which might appear. QRSS also allows the inclusion of arbitrary namespaces and associated elements when building RSS documents. For example, to add elements of a made-up "My" module, first declare the namespace by associating a prefix with a URI: $rss->add_module(prefix=>'my', uri=>'http://purl.org/my/rss/module/'); Then proceed as usual: $rss->add_item (title=>$title, link=>$link, my=>{ rating=>$rating }); Non-standard namespaces are not, however, currently accessible via a simple prefix; access them via their namespace URL like so: print $rss->{items}->[0]->{'http://purl.org/my/rss/module/'}->{rating}; QRSS will continue to provide built-in support for standard RSS 1.0 modules as they appear. =head1 AUTHOR for QRSS qux <qux@hauN.org> for XML::RSS Original code: Jonathan Eisenzopf <eisen@pobox.com> Further changes: Rael Dornfest <rael@oreilly.com> Currently: perl-rss project (http://perl-rss.sourceforge.net) =head1 COPYRIGHT QRSS is free software. You can redistribute it and/or modify it under the same terms as Perl itself. =head1 SEE ALSO perl(1), =cut