Bivio::Test::HTMLParser::Links
# Copyright (c) 2002-2009 bivio Software, Inc. All Rights Reserved. # $Id$ package Bivio::Test::HTMLParser::Links; use strict; use Bivio::Base 'Test.HTMLParser'; use Bivio::IO::Trace; our($_TRACE); my($_IDI) = __PACKAGE__->instance_data_index; __PACKAGE__->register(['Cleaner']); sub html_parser_end { # (self, string, string) : undef # Dispatch to the _end_XXX routines. my($self, $tag) = @_; my($fields) = $self->[$_IDI]; pop(@{$fields->{xpath}}); return _end_a($self) if $tag eq 'a'; return; } sub html_parser_start { # (self, string, hash_ref, array_ref, string) : undef # Dispatches to the _start_XXX routines. my($self, $tag, $attr) = @_; my($fields) = $self->[$_IDI]; push(@{$fields->{xpath}}, $tag . ($attr->{class} ? ".$attr->{class}" : '')); return _start_a($fields, $attr) if $tag eq 'a'; return _start_img($self, $attr) if $tag eq 'img'; return; } sub html_parser_text { # (self, string) : undef # Text is applied to the current link, if any. # # For links, we can't assume that we are called with an entire sequence # of text (like Forms), so we append until the end_a. my($self, $text) = @_; my($fields) = $self->[$_IDI]; $text = $self->get('cleaner')->text($text); $fields->{text} .= $text if $fields->{href}; return; } sub new { # (proto, Test.HTMLParser) : HTMLParser.Links # Parses cleaned html for links. my($proto, $parser) = @_; my($self) = $proto->SUPER::new; $self->[$_IDI] = { xpath => [], }; return $self; } sub _end_a { # (self) : undef # No longer in a link. my($self) = @_; my($fields) = $self->[$_IDI]; _link($self, $fields->{text}) if defined($fields->{text}) && defined($fields->{href}); $fields->{href} = undef; return; } sub _link { # (self, string, string) : undef # Adds the link. Creates unique name ($label_$i) if not unique. my($self, $label, $alt) = @_; my($fields) = $self->[$_IDI]; my($base, $i) = $label; while ($self->get('elements')->{$label}) { return if $self->get('elements')->{$label}->{href} eq ($fields->{href} || ''); $label = $base . '_' . ++$i; } $self->get('elements')->{$label} = { label => $label, href => $fields->{href}, alt => $alt, }; _trace($label, '->', $fields->{href}) if $_TRACE; return; } sub _start_a { # (hash_ref, hash_ref) : undef # Stores the href. my($fields, $attr) = @_; Bivio::Die->die( 'already have an href (missing </a>). current=', $fields->{href}, ' new=', $attr->{href}, ) if $fields->{href}; return if $attr->{name} && !$attr->{href} # DropDown creates links that are meaningless for testing || $attr->{onclick} && ($attr->{href} || '') eq '#'; unless (defined($attr->{href}) || $attr->{name}) { b_info( join('/', @{$fields->{xpath}}), ': missing href or name, ignoring: ', $attr, ); return; } $fields->{href} = $attr->{href}; $fields->{text} = ''; return; } sub _start_img { # (self, hash_ref) : undef # Adds a new link. my($self, $attr) = @_; my($fields) = $self->[$_IDI]; return unless $fields->{href}; Bivio::Die->die('missing src: ', $attr) unless $attr->{src}; # Delete the gif/jpg suffix and any directory prefix $attr->{src} =~ s/(?:.*\/)?([^\/]+)\.\w+$/$1/; _link($self, $attr->{src}, $attr->{alt}); return; } 1;