Bivio::Type::DisplayName
# Copyright (c) 2006 bivio Software, Inc. All Rights Reserved. # $Id$ package Bivio::Type::DisplayName; use strict; use Bivio::Base 'Type.Line'; my($_N) = b_use('Type.Name'); my($_WIDTH) = b_use('Type.Text64K')->get_width; sub from_names { my($proto, @names) = @_; b_die('must provide at least one name') if @names < 1; b_die('must provide at most three names') if @names > 3; my($dn) = join(' ', @names); $dn =~ s/\s+/ /g; $dn =~ s/^\s+|\s+$//g; return $dn; } sub get_width { return $_WIDTH; } sub parse_to_names { my(undef, $display_name) = @_; # Returns a hash_ref of first_name, middle_name, last_name parsed from the # display_name. Returns L<Bivio::TypeError|Bivio::TypeError> if there # was a syntax error while parsing. # Clean up display_name (add spaces after commas, non-suffix periods, # remove extra spaces). my($dn) = $display_name; $dn =~ s/,(\S)/, $1/g; $dn =~ s/\.(.*\s)/. $1/g; # Split on spaces; reorder and remove comma if entered backwards # Only works if comma after first word (eg: 'la salle, jane' fails) my(@parts) = split(' ', $dn); return Bivio::TypeError->UNSPECIFIED unless scalar(@parts); if($parts[0] =~ /,$/) { $parts[0] =~ s/,//; push(@parts, shift(@parts)); } # Parse by priority. There is always a last name (unless format is a & b) my($name) = { map(($_ . '_name' => ''), qw(last first middle)), }; _parse_last($name, \@parts); _parse_first($name, \@parts); _parse_middle($name, \@parts); my($total) = 0; foreach my $part (keys(%$name)) { return Bivio::TypeError->from_name(uc($part).'_LENGTH') if defined($name->{$part}) && length($name->{$part}) > Bivio::Type::Name->get_width; $total += length($name->{$part}); } return Bivio::TypeError->NULL unless $total; return $name; } sub _is_conjunction { my($str) = @_; # Returns 1 if $str matches a conjunction return 1 if ($str =~ /^and$|^&$/i); return 0; } sub _parse_first { my($name, $parts) = @_; # Sets the prefix (if applicable) and the firstname. # Catches (with or without periods): # Mr, Mrs, Sir, Dr, Miss, Rev, Ms, etc. # Names joined by 'and' or '&' are considered both firstnames components, # unless there is no last name given. return unless int (@$parts)>0; my($first) = shift(@$parts); if ($first =~ /^(mr\.?|mrs\.?|sir|dr\.?|miss|rev\.?|ms\.?)$/i && @$parts) { $name->{first_name} = $first.' '.shift(@$parts); } else { $name->{first_name} = $first; } return unless @$parts; if(_is_conjunction($parts->[0])) { $name->{first_name} .= ' '.shift(@$parts).' '; $name->{first_name} .= shift(@$parts) if @$parts; } # Remove trailing space if exists $name->{first_name} =~ s/\s$//; return; } sub _parse_last { my($name, $parts) = @_; # Sets the suffix (if applicable) and the surname (including patronymic). # Catches anything with a period in the last place as a suffix. Also detects # certain common suffixes without periods, ie: # Sr, Jr, PhD, JD, MD, I, II, III, IV, 1st, 2nd, 3rd, etc. # Removes commas if present before adding them. # Returns if second to last word is a form of '&' (both stored as first name) return if @$parts > 2 && _is_conjunction($parts->[-2]); my($last) = pop(@$parts); $name->{last_name} = $last; if (scalar(@$parts)) { while (@$parts && ($last =~ /^(sr|jr|phd|dvm|jd|md|dds|pe|I|IV|V|\d..)$|\.|^I{2,}/i || ((@$parts)[-1] && (@$parts)[-1] =~ /\,$/))) { $last = pop(@$parts); $name->{last_name} = $last . ($last =~ /\,$/ ? ' ' : ', ') . $name->{last_name}; } } return unless scalar(@$parts); # Check for patronymics in last place: van, von, de la if ($parts->[-1] =~ /^(van|von|la|de|du)$/i) { my($patr) = pop(@$parts); $name->{last_name} = $patr.' '.$name->{last_name}; return unless defined($parts->[0]); if ($parts->[-1] =~ /^de$/i) { my($de) = pop(@$parts); $name->{last_name} = $de.' '.$name->{last_name}; } } return; } sub _parse_middle { my($name, $parts) = @_; # Checks for leftover parts and stores them as the middle name. return unless @$parts; $name->{middle_name} = join(' ', @$parts); return; } 1;