Bivio::Test::HTMLParser
# Copyright (c) 2002-2004 bivio Software, Inc. All Rights Reserved. # $Id$ package Bivio::Test::HTMLParser; use strict; use Bivio::Base 'Collection.Attributes'; use Encode (); # C<Bivio::Test::HTMLParser> directs parsing of html by calling classes in the # TestHTMLParser class map. # # html : string # # The HTML which was passed to new # # E<lt>simple_classE<gt> : string # # Each parser class is put on I<self>. See parser classes for their attributes. my(@_CLASSES); b_use('IO.ClassLoader')->map_require_all('TestHTMLParser'); my($_HP) = b_use('Ext.HTMLParser'); sub html_parser_comment { return; } sub html_parser_end { return; } sub html_parser_start { return; } sub html_parser_text { return; } sub internal_new { # (proto, Test.HTMLParser) : Test.HTMLParser # Calls parser subclass to parse cleaned html. Subclass must implement # L<Bivio::Ext::HTMLParser|Bivio::Ext::HTMLParser> interface. Sets two # attributes: I<cleaner> and I<elements>. I<cleaner> is an instance of # C<Cleaner>, and I<elements> is a hash which will be put as the attributes of # I<self> when parsing is complete. my($proto, $parser) = @_; return undef unless my $self = $proto->new($parser); $self->internal_put({ cleaner => $parser->get('Cleaner'), elements => {}, }); my($p) = $_HP->new($self); $p->ignore_elements(qw(script style)); $p->parse($self->get('cleaner')->get('html')); $self->internal_put($self->get('elements')); return $self->set_read_only; } sub is_not_bivio_html { return shift->unsafe_get('is_not_bivio_html') ? 1 : 0; } sub new { # (proto, string_ref) : Test.HTMLParser # (proto, hash_ref) : Test.HTMLParser # Parse I<html> using registered parser classes. # # If I<html> is undef or I<attrs> is passed, does nothing (pass through # L<internal_new|"internal_new"> for subclasses). my($proto) = shift; return $proto->SUPER::new(@_) unless (ref($proto) || $proto) eq __PACKAGE__; my($html, $attrs) = @_; my($self) = $proto->SUPER::new({ %{$attrs || {}}, html => Encode::decode_utf8($$html), }); foreach my $c (@_CLASSES) { $self->put($c->simple_package_name => $c->internal_new($self)); } return $self->set_read_only; } sub register { # (proto, array_ref) : undef # Adds I<proto> to list of classes, but first loads I<prerequisite_classes>. my($proto, $prerequisite_classes) = @_; foreach my $p (@{$prerequisite_classes || []}) { b_use('TestHTMLParser', $p); } push(@_CLASSES, ref($proto) || $proto); return; } 1;