Bivio::Test::HTMLParser::Forms
# Copyright (c) 2002-2012 bivio Software, Inc. All Rights Reserved. # $Id$ package Bivio::Test::HTMLParser::Forms; use strict; use Bivio::Base 'Test.HTMLParser'; b_use('IO.Trace'); b_use('IO.ClassLoaderAUTOLOAD'); # C<Bivio::Test::HTMLParser::Forms> models the forms on a page. our($_TRACE); my($_IDI) = __PACKAGE__->instance_data_index; my($_HTML) = b_use('Bivio.HTML'); my($_F) = b_use('UI.Facade'); my($_R) = b_use('IO.Ref'); b_use('IO.Config')->register(my $_CFG = { error_color => '#993300', # Set by XHTMLWidget.FormFieldError error_class => 'field_err', error_title_class => 'err_title', disable_checkbox_heading => {}, }); __PACKAGE__->register(['Cleaner']); sub get_by_field_names { my($self, @name) = @_; # Returns the form data by finding by I<name>(s) in visible and submit # fields of forms. See interpretation of I<name> in # L<unsafe_get_field|"unsafe_get_field">. my($form) = shift->unsafe_get_by_field_names(@_); return $form if $form; b_die(\@name, ': no form matches named fields; all visible form fields: ', map({[sort(keys(%{$_->{visible}}), keys(%{$_->{submit}}))]} values(%{$self->get_shallow_copy}))); } sub get_field { my($proto, $form, $name) = @_; # Calls L<unsafe_get_field|"unsafe_get_field"> and dies unless matches # fields exactly. my($res) = shift->unsafe_get_field(@_); b_die($name, ': ', (@$res ? ('matches too many fields: ', @$res) : 'field not found'), ' in ', $form->{label}) unless @$res == 1; return $res->[0]; } sub get_ok_button { my($self, $form) = @_; # Returns name of ok_button. There cannot be more than one button, # excluding cancel. If there are no submit buttons, returns undef. $form = $self->get_by_field_names($form) unless ref($form) eq 'HASH'; my(@ok) = grep(!/cancel/i, keys(%{$form->{submit}})); b_die('must not be more than one submit ', \@ok) if @ok > 1; return $ok[0]; } sub handle_config { my(undef, $cfg) = @_; # error_class : string [form_field_error] # # unique class for error text on page. If found, assumes form failed. # # error_color : string [#990000] # # unique color for error text on page. If found, assumes form failed. # # disable_checkbox_heading : hash_ref {} # # Controls whether a checkbox in a table uses the table heading as a # label, or if the label is parsed from nearby text. # The key is the name of the table heading. $_CFG = $cfg; return; } sub html_parser_end { my($self, $tag) = @_; # Dispatch to the _end_XXX routines. my($fields) = $self->[$_IDI]; return _end_label($fields) if $tag eq 'label'; return _end_button($self, $fields) if $tag eq 'button'; return _end_th($fields) if $tag eq 'th'; return _end_table($fields) if $tag eq 'table'; return _end_form($self) if $tag eq 'form'; return _end_textarea($fields) if $tag eq 'textarea'; return _end_select($fields) if $tag eq 'select'; return _end_maybe_err($fields, $tag) if $tag =~ /^(font|span|div)$/ || ($tag eq 'ul' && $_F->is_2014style); return; } sub html_parser_start { my($self, $tag, $attr) = @_; # Calls _fixup_attr then dispatches to the _start_XXX routines. my($fields) = $self->[$_IDI]; _fixup_attr($tag, $attr); if (($attr->{class} || '') eq 'label') { $fields->{text} = undef; } return _start_label($fields, $attr) if $tag eq 'label'; return _start_button($fields, $attr) if $tag eq 'button'; return _start_tx($fields, $attr, $tag) if $tag =~ /^t(?:d|r|h|able)$/; return _start_form($fields, $attr) if $tag eq 'form'; return _start_option($fields, $attr) if $tag eq 'option'; return _start_input($self, $attr) if $tag eq 'input' || ($attr->{type} && $tag !~ /^(?:link|style|script)$/); return _start_maybe_err($fields, $tag, $attr) if $tag =~ /^(font|span|div)$/ || ($tag eq 'ul' && $_F->is_2014style); return; } sub html_parser_text { my($self, $text) = @_; $text = $self->get('cleaner')->text($text); # Text is applied as labels to form fields. It is cleaned first. # In the case of textareas, the cleaned text is saved as the "value" # (see _end_textarea). # # We assume that we can parse an entire sequence of text for # column headers, etc. my($fields) = $self->[$_IDI]; if ($fields->{textarea}) { $fields->{textarea}->{value} .= defined($text) ? $_HTML->unescape($text) : ''; return; } $text = $self->get('cleaner')->text($text); # We never label fields with blanks. There are occassions where blanks # are upcalled just after the actual text. # Select widgets may have an empty value. return unless length($text) || $fields->{option}; $fields->{text} .= $text . ($fields->{in_label} ? ':' : ''); return if _have_prefix_label($fields); return _label_option($fields) if $fields->{option} || $fields->{radio}; return _label_visible($fields) if $fields->{input}; return; } sub new { my($proto, $parser) = @_; # Parses cleaned html for forms. my($self) = $proto->SUPER::new; $self->[$_IDI] = { is_not_bivio_html => $parser->is_not_bivio_html, }; return $self; } sub unsafe_get_by_field_names { my($self, @name) = @_; # Look for form by field names my($found); my($forms) = $self->get_shallow_copy; FORM: while (my($form, $values) = each(%$forms)) { foreach my $n (@name) { next FORM unless @{$self->unsafe_get_field($values, $n)}; } b_die(\@name, ': too many forms matched fields') if $found; $found = $values; } return $found; } sub unsafe_get_field { my(undef, $form, $name) = @_; # Returns field from I<form>. I<name> may be a string or a regular # expression. If it is a string and it matches '(?.*)', I<name> will be # treated as a regular expression. # Returns list of matches or empty array_ref. $name = $name =~ /^\(\?.*\)$/s ? qr/$name/ : qr/^\Q$name\E$/s unless ref($name); my($res) = []; foreach my $c (qw(visible submit hidden)) { push( @$res, map( $form->{$c}->{$_}, grep($_ =~ $name, keys(%{$form->{$c}})), ), ); } return $res; } sub _empty { # Returns true if !defined or zero length return !grep(defined($_) && length($_), @_); } sub _end_button { my($self, $fields) = @_; if ($fields->{text}) { $fields->{button}->{value} = $fields->{text}; _label_submit($self, $fields->{button}); } $fields->{button} = undef; return; } sub _end_form { my($self) = @_; # Ends the form and puts in $fields->{current}. my($fields) = $self->[$_IDI]; if (!$fields->{is_not_bivio_html} && $fields->{input}) { my($attr) = $fields->{input}; delete($fields->{input}); $attr->{label} ||= $attr->{name}; _label_field($fields, 'visible', $attr); } _unwind_duplicates($fields); my($label) = $fields->{current}->{label}; my($curr) = $fields->{current}; $fields->{current} = undef; if (defined($label)) { my($e) = $self->get('elements'); # If there is a complete duplicate, then we ignore. if ($e->{$label}) { if ($_R->nested_equals($e->{$label}, $curr)) { _trace('ignoring duplicate form: ', $curr) if $_TRACE; return; } # Rename first form my($new_label) = "$label#0"; $e->{$new_label} = $e->{$label}; $e->{$new_label}->{label} = $label; $e->{$label} = undef; } for (my $i = 0; $e->{"$label#$i"};) { $curr->{label} = $label . '#' . ++$i; } $self->get('elements')->{$curr->{label}} = $curr; } _trace($_R->to_string($curr)) if $_TRACE; return; } sub _end_label { my($fields) = @_; $fields->{in_label} = 0; return; } sub _end_maybe_err { my($fields, $tag) = @_; return 0 if $fields->{is_not_bivio_html}; return 0 if $_F->is_2014style && ! $fields->{in_error_ul}; # Ends the current tag which may contain err. my($f) = pop(@{$fields->{maybe_err}}); $fields->{current}->{error_title_seen}++ if $f->{class} && $f->{class} eq $_CFG->{error_title_class}; return unless ( $tag eq 'ul' || ( $f->{color} ? $f->{color} eq $_CFG->{error_color} : $f->{class} ? $f->{class} eq $_CFG->{error_class} : $tag eq 'ul' ) ) && !_empty($fields->{text}) && !_have_prefix_label($fields); $fields->{input_error} = substr($fields->{text}, $f->{text_start_length}); $fields->{text} = undef; if ($tag eq 'ul') { $fields->{in_error_ul} = 0; } return; } sub _end_select { my($fields) = @_; $fields->{select} = undef; _leftover_input($fields) if $fields->{input}; return; } sub _end_table { my($fields) = @_; return if $fields->{is_not_bivio_html}; # The only tables we track are "data" tables. $fields->{in_data_table}-- if $fields->{in_data_table}; return; } sub _end_textarea { my($fields) = @_; # Saves the value for the textarea. May not have "text", because # textarea might be blank. _trace($fields->{textarea}) if $_TRACE; $fields->{textarea}->{value} = '' unless defined($fields->{textarea}->{value}); $fields->{textarea} = undef; return; } sub _end_th { my($fields) = @_; return if $fields->{is_not_bivio_html}; # Ends the "th". # There's a weird case where {text} will be the empty string, # but that's ok in this case. $fields->{text} = ' ' if _empty($fields->{text}); push(@{$fields->{headers}}, _text($fields)); _trace('push header ', $fields->{headers}->[$#{$fields->{headers}}]) if $_TRACE; return; } sub _fixup_attr { my($tag, $attr) = @_; # Lowercases all attr values which we care about. Sets type # for select and textarea. while (my($k, $v) = each(%$attr)) { $attr->{$k} = lc($v) if $k =~ /^(?:method|type)$/; } $attr->{type} = $tag if $tag =~ /^(?:select|textarea)$/; $attr->{type} = 'submit' if $tag eq 'input' && $attr->{type} && $attr->{type} eq 'image'; # HTML::Parser sets these values to "checked" or "selected" $attr->{selected} = $attr->{checked} = 1 if $attr->{checked}; $attr->{selected} = 1 if $attr->{selected}; return; } sub _have_prefix_label { my($fields) = @_; return 0 if $fields->{is_not_bivio_html}; # Returns true if $fields->{text} is a prefix label (ends with colon) return $fields->{text} && $fields->{text} =~ /:$/; } sub _label_field { my($fields, $class, $attr) = @_; # Labels all fields, checking for duplicates. Allows _radio # for labels, however. _trace($attr) if $_TRACE; return unless $fields->{current}->{$class} && $attr->{label}; push(@{$fields->{current}->{$class}->{$attr->{label}} ||= []}, $attr); _trace($fields->{current}, ' ', $attr); $fields->{current}->{label} = $attr->{label} unless $fields->{current}->{label} || $attr->{label} =~ /^_anon/ || $class eq 'hidden'; return; } sub _label_option { my($fields) = @_; # Labels the option and adds to select or radio. We label the select # (not the radio) if it hasn't already been labeled and the option is # selected. This handles the Select Site, Select Investment, etc. cases. my($which) = $fields->{radio} ? 'radio' : 'option'; my($group) = $fields->{radio} ? $fields->{radios}->{$fields->{radio}->{name}} : $fields->{select}; my($o) = $fields->{$which}; $o->{label} = _text($fields, $fields->{option} ? 1 : 0); _trace($o) if $_TRACE; b_die('duplicate ', $which, ': ', $o, ' select: ', $group) if $group->{options}->{$o->{label}}; $group->{options}->{$o->{label}} = $o; $group->{value} = $o->{value} if $o->{selected} || !defined($group->{value}); if ($fields->{option} && !defined($fields->{select}->{label})) { my($x) = $o->{value}; $fields->{text} = (!$x && $o->{label}) || $o->{label} =~ /^(?:select|choose)\s/i ? $o->{label} : '_anon'; _label_visible($fields); } $fields->{$which} = undef; return; } sub _label_radio { my($fields) = @_; # Labels a radio button and puts it in the appropriate radio hash. my($r) = $fields->{radio}; $r->{label} = _text($fields); $fields->{input} = $fields->{radio}; return; } sub _label_submit { my($self, $attr) = @_; # Labels the submit fields. my($fields) = $self->[$_IDI]; $attr->{label} = $self->get('cleaner')->text( $attr->{src} ? _submit_label_clean($attr->{src}) : $attr->{value}); $attr->{label} .= '_'.$attr->{index} if defined($attr->{index}); _label_field($fields, 'submit', $attr); if ($fields->{input_error}) { $attr->{error} = $fields->{input_error}; push(@{$fields->{current}->{errors} ||= []}, $attr); $fields->{input_error} = undef; } return; } sub _label_visible { my($fields) = @_; # Labels the current input field. my($label) = _text($fields); # We don't label selects with blanks. Rather with the selected value. if ($fields->{is_not_bivio_html} && $fields->{input}->{type} ne 'radio') { #FIX return $fields->{input} = undef if $fields->{input}->{label}; $label = $fields->{input}->{name}; } return if !length($label) && $fields->{input}->{type} eq 'select'; $fields->{input}->{label} = $label; _label_field($fields, 'visible', $fields->{input}); if ($fields->{input_error}) { $fields->{input}->{error} = $fields->{input_error}; push(@{$fields->{current}->{errors} ||= []}, $fields->{input}); $fields->{input_error} = undef; } $fields->{input} = undef; return; } sub _label_with_name { my($fields, $attr) = @_; # Labels the hidden fields. $attr->{label} = $attr->{name}; _label_field($fields, $attr->{type} eq 'hidden' ? 'hidden' : 'visible', $attr); return; } sub _leftover_input { my($fields) = @_; # Left over input field at start of new input. Anonymous field. Save # context for next field. my(@save) = @{$fields}{qw{text prev_cell_text}}; $fields->{text} = '_anon'; _label_visible($fields); @{$fields}{qw{text prev_cell_text}} = @save; return; } sub _start_button { my($fields, $attr) = @_; _leftover_input($fields) if $fields->{input}; $attr->{index} = $1 if $attr->{name} && $attr->{name} =~ /_(\d+)$/; $fields->{text} = undef; $fields->{button} = $attr; return; } sub _start_form { my($fields, $attr) = @_; # Starts a form. $fields->{current} = { %$attr, visible => {}, hidden => {}, submit => {}, }; $fields->{text} = $fields->{prev_cell_text} = undef; $fields->{radios} = {}; _trace($fields->{current}) if $_TRACE; return; } sub _start_input { my($self, $attr) = @_; # Starts a new field. Certain fields have labels before. Others # have labels after. Some have labels as the column header. $attr->{type} ||= 'text'; my($fields) = $self->[$_IDI]; unless ($fields->{is_not_bivio_html}) { _trace($fields->{text}, ' ', $fields->{input}, ' ', $fields->{prev_cell_text}, ' ', $attr) if $_TRACE; _leftover_input($fields) if $fields->{input}; return _label_with_name($fields, $attr) if $attr->{type} eq 'hidden'; # If a ListForm field, we grab the index from the header. $attr->{index} = $1 if $attr->{name} && $attr->{name} =~ /_(\d+)$/; } return _label_submit($self, $attr) if $attr->{type} eq 'submit'; # visible field $fields->{input} = $attr; # Text areas and select are special $fields->{$attr->{type}} = $attr if $attr->{type} =~ /^(?:select|textarea)$/; return _label_with_name($fields, $attr) if $fields->{is_not_bivio_html} && $attr->{type} ne 'option'; return _start_radio($fields) if $attr->{type} eq 'radio'; # Visible list form field is labeled with the header # if there is one. #TODO: Deal with the case when no header and not a checkbox if (defined($attr->{index}) && $fields->{headers} && defined($fields->{headers}->[$fields->{cell_num}])) { return if $attr->{type} =~ /checkbox/ && $_CFG->{disable_checkbox_heading}->{ $fields->{headers}->[$fields->{cell_num}]}; $fields->{text} = $fields->{headers}->[$fields->{cell_num}] . '_' . $attr->{index}; return _label_visible($fields); } # Nothing to label unless defined return if _empty($fields->{text}, $fields->{prev_cell_text}); # A field has a label if the word preceding it ends with a ':' return _label_visible($fields) if ($fields->{text} || $fields->{prev_cell_text}) =~ /\:\s*$/ && $attr->{type} !~ /checkbox/; # Unlabeled field. Will be dealt with on closing tag or next text return; } sub _start_label { my($fields, $attr) = @_; $fields->{text} = undef; $fields->{in_label} = 1 unless $attr->{for}; return; } sub _start_maybe_err { my($fields, $tag, $attr) = @_; return if $fields->{is_not_bivio_html}; # Saves current tag info. if (($fields->{text} || '') =~ m/:$/) { $fields->{prev_cell_text} = $fields->{text}; $fields->{text} = undef; } push(@{$fields->{maybe_err}}, { %$attr, text_start_length => length($fields->{text} || ''), }); if ($tag eq 'ul' && ($attr->{class} || '') eq $_CFG->{error_class}) { $fields->{in_error_ul} = 1; } return; } sub _start_option { my($fields, $attr) = @_; # Handles an OPTION tag. b_die('not in a select: ', $fields) unless $fields->{select}; $fields->{text} = undef; $fields->{select}->{options} ||= {}; $fields->{option} = $attr; return; } sub _start_radio { my($fields) = @_; # Deals with a new radio. We may have to start a new record in # $fields->{radios}. All radios are labelled non-uniquely "_radio". my($r) = $fields->{radio} = $fields->{input}; $fields->{input} = undef; return if $fields->{radios}->{$r->{name}}; # Save state in both radios and input. Then label the field. $fields->{input} = $fields->{radios}->{$r->{name}} = { name => $r->{name}, options => {}, type => 'radio', }; $fields->{text} = '_radio'; _label_visible($fields); return; } sub _start_tx { my($fields, $attr, $tag) = @_; return if $fields->{is_not_bivio_html}; # Starts a TD, TH, TR, or TABLE. if ($tag =~ /th|td/) { $fields->{prev_cell_text} = $fields->{text} if defined($fields->{text}) && length($fields->{text}); $fields->{text} = undef; } if ($fields->{in_data_table}) { return $fields->{in_data_table}++ if $tag eq 'table'; # Only count the top level rows if ($fields->{in_data_table} == 1) { return $fields->{cell_num} = -1 if $tag eq 'tr'; return $fields->{cell_num}++ if $tag =~ /td|th/; } } elsif ($tag eq 'th') { _trace('begin data table') if $_TRACE; $fields->{headers} = []; $fields->{cell_num} = 0; return $fields->{in_data_table} = 1; } return; } sub _submit_label_clean { my($src) = @_; # Grabs icon name. $src = Type_CacheTagFilePath()->to_untagged_path($src); $src =~ /(?:.*\/)?([^\/]+)\.\w+$/; return $1; } sub _text { my($fields, $no_die) = @_; # Returns the text field or dies if zero length. Won't die if !$no_die. my($res) = !_empty($fields->{text}) ? $fields->{text} : !_empty($fields->{prev_cell_text}) ? $fields->{prev_cell_text} : $no_die ? '' : b_die('no text field: ', $fields); $fields->{prev_cell_text} = $fields->{text} = undef; return $res; } sub _unwind_duplicates { my($fields) = @_; # Renames duplicates and unwinds singletons. foreach my $class (qw(visible hidden submit)) { my($c) = $fields->{current}->{$class}; foreach my $k (keys(%$c)) { my($found) = $c->{$k}; my($unique) = []; if (@$found == 1) { $c->{$k} = $found->[0]; next; } # If all values are identical, only save one if (grep($_R->nested_equals($found->[0], $_), @$found) == @$found) { _trace('all duplicates ', $k) if $_TRACE; $c->{$k} = {%{$found->[0]}, label => $k}; next; } # Different values, so put in # nams delete($c->{$k}); my($i) = 0; foreach my $v (@$found) { $c->{$v->{label} = $k . '#' . $i++} = $v; _trace('relabeled ', $v) if $_TRACE; } } } return; } 1;