Bivio::Mail::Address
# Copyright (c) 2000-2008 bivio Software, Inc. All rights reserved.
# $Id$
package Bivio::Mail::Address;
use strict;
use Bivio::Base 'Bivio::UNIVERSAL';
my($_RFC) = __PACKAGE__->use('Mail.RFC822');
#TODO: Remove this
my($ATOM_ONLY_PHRASE) = $_RFC->ATOM_ONLY_PHRASE;
my($ATOM_ONLY_ADDR) = $_RFC->ATOM_ONLY_ADDR;
my($QUOTED_STRING) = $_RFC->QUOTED_STRING;
my($NOT_NESTED_COMMENT) = $_RFC->NOT_NESTED_COMMENT;
my($MAILBOX) = $_RFC->MAILBOX;
my($ADDR_SPEC) = $_RFC->ADDR_SPEC;
my($ROUTE_ADDR) = $_RFC->ROUTE_ADDR;
my($PHRASE) = $_RFC->PHRASE;
my($LOCAL_PART) = $_RFC->LOCAL_PART;
my($_E) = __PACKAGE__->use('Type.Email');
sub escape_comment {
my(undef, $comment) = @_;
$comment =~ s/(["\\])/\\$1/g;
return $comment;
}
sub parse {
# (proto, string) : array
# 822:
# For purposes of display, and when passing such struc-
# tured information to other systems, such as mail proto-
# col services, there must be NO linear-white-space
# between <word>s that are separated by period (".") or
# at-sign ("@") and exactly one SPACE between all other
# <word>s. Also, headers should be in a folded form.
#
# There is one type of bracket which must occur in matched pairs
# and may have pairs nested within each other:
#
# o Parentheses ("(" and ")") are used to indicate com-
# ments.
#
# There are three types of brackets which must occur in matched
# pairs, and which may NOT be nested:
#
# o Colon/semi-colon (":" and ";") are used in address
# specifications to indicate that the included list of
# addresses are to be treated as a group.
#
# o Angle brackets ("<" and ">") are generally used to
# indicate the presence of a one machine-usable refer-
# ence (e.g., delimiting mailboxes), possibly including
# source-routing to the machine.
#
# o Square brackets ("[" and "]") are used to indicate the
# presence of a domain-literal, which the appropriate
# name-domain is to use directly, bypassing normal
# name-resolution mechanisms.
#
# These appear after -----Original Message-----
# From: Jeffrey Richer [SMTP:jricher@inet.net]
# From: . <winsv@ix.netcom.com>
# From: <MNatto@aol.com>
# Probably part of Outlook. Not a problem for us as the "Original Message"
# is not an 822 thing.
#
# Parses the first address in the field. If there are multiple
# addresses, only grabs the first one.
#
# Returns an array (address, name) or (undef, undef) if the input
# could not be parse successfully.
my(undef, $addr) = @_;
my($REST) = '\s*(?:,\s*(.*)?)?$';
local($_) = $addr;
s/^\s+//s;
my($n, $a, $r);
# Cases are optimized by their statistical counts.
# Joe Bob <joe@bob.com>
if (($n, $a, $r) = /^($ATOM_ONLY_PHRASE)\s*\<($ATOM_ONLY_ADDR)\>$REST/os) {
return ($a, $n, $r);
}
# "Joe Bob" <joe@bob.com>
if (($n, $a, $r) = /^($QUOTED_STRING)\s*\<($ATOM_ONLY_ADDR)\>$REST/os) {
return ($a, _clean_quoted_string($n), $r);
}
# joe@bob.com -- grab first addr, not allowing comment
if (($a, $r) = m!^($ATOM_ONLY_ADDR)$REST!os) {
return ($a, undef, $r);
}
# joe@bob.com (Joe Bob)
if (($a, $n, $r) = m!^($ATOM_ONLY_ADDR)\s*($NOT_NESTED_COMMENT)$REST!os) {
return ($a, _clean_comment($n), $r);
}
if (($a, $n, $r) = /^($MAILBOX)\s*((?:$NOT_NESTED_COMMENT)*)$REST/os) {
#TODO: Need to make sure we hit 99.99% of addresses with this
# We don't handle groups. ok? What about "Undisclosed Recipients:;"?
# complex@addr (My comment) AND complex@addr
if ($a =~ /^$ADDR_SPEC$/os) {
# $a is an address, no further parsing necessary
return ($a, length($n) ? _clean_comment($n) : undef, $r);
}
#TODO: Die if $REST not empty?
# $MAILBOX: <complex@addr>
if (($a) = /^($ROUTE_ADDR)/) {
return (_clean_route_addr($a), undef, undef);
}
# $MAILBOX: My Comment <complex@addr>
if (($n, $a) = /^($PHRASE)\s+($ROUTE_ADDR)/) {
return (_clean_route_addr($a), $n, undef);
}
#TODO: error or assert_fail
b_die('regexps incorrect, cannot parse: ', $_);
}
# Local delivery: root
if (($a, $r) = m!^($LOCAL_PART)$REST!os) {
return ($a, undef, $r);
}
# Illegal implementations follow:
#
# PoorImpl.com <hackers@foo.com>
if (($n, $a, $r) = /^([^<>"]+)\s*\<($ATOM_ONLY_ADDR)\>$REST/os) {
$n =~ s/\s+$//;
return ($a, _clean_quoted_string(qq{"$n"}), $r);
}
b_warn('Unable to parse address: ', $_);
return (undef, undef, undef);
}
sub parse_list {
my($proto, $addr_list) = @_;
return []
unless $addr_list;
return []
if $addr_list =~ /^undisclosed-recipients:;$/i;
my($addrs) = [];
my($addr);
while (1) {
my($old_list) = $addr_list;
($addr, undef, $addr_list) = $proto->parse($addr_list);
b_die($old_list, ': invalid address')
unless $addr;
push(@$addrs, $addr);
last unless $addr_list;
b_die($old_list, ': parse() did not trim addr_list')
if length($addr_list) > length($old_list);
}
return $addrs;
}
sub parse_list_strict {
# Parse a string into a list of email addresses. Only literal email
# addresses are allowed--RFC822 comments and extensions are not
# supported.
#
# Errors will be pushed onto error_list, if present.
my($proto, $list_string, $error_list) = @_;
my($email_list) = [];
foreach my $email ($list_string =~ /([^\s,]+)/gs) {
my($parsed) = $_E->from_literal($email);
if ($parsed) {
push(@$email_list, $parsed);
}
elsif (ref($error_list) eq 'ARRAY') {
push(@$error_list, $email . ' is not a valid email address.');
}
}
return $email_list;
}
sub parse_local_part {
my(undef, $email) = @_;
return ($email =~ /(.+?)\@/)[0];
}
sub _clean_comment {
local($_) = @_;
s/^\(//s && s/\)$//s || Carp::cluck("not a comment: $_");
s/\\(.)/$1/gs;
return $_;
}
sub _clean_quoted_string {
local($_) = @_;
s/^\"//s && s/\"$//s || b_die("not a quoted string: $_");
s/\\(.)/$1/gs;
return $_;
}
sub _clean_route_addr {
local($_) = @_;
s/^\<//s && s/\>$//s || b_die("not a route address: $_");
return $_;
}
1;