# Copyright (c) 1999-2010 bivio Software, Inc. All rights reserved. # # Visit http://www.bivio.biz for more info. # # This library is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as # published by the Free Software Foundation; either version 2.1 of the # License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; If not, you may get a copy from: # http://www.opensource.org/licenses/lgpl-license.html # # $Id: Query.pm,v 2.4 2011/11/05 23:43:16 nagler Exp $ package Bivio::Agent::HTTP::Query; use strict; use Bivio::Base 'Bivio.UNIVERSAL'; use Bivio::HTML; our($VERSION) = sprintf('%d.%02d', q$Revision: 2.4 $ =~ /\d+/g); my($_HTML) = b_use('Bivio.HTML'); my($_LQ) = b_use('SQL.ListQuery'); my($_U) = b_use('Bivio.UNIVERSAL'); my($_A); sub format { # (proto, hash_ref) : string # Returns the string version of the query. Returns C if I is # C. Attributes of the form CI will be looked up # with L. my(undef, $query, $req) = @_; return undef unless $query; if (exists($query->{acknowledgement})) { ($_A ||= b_use('Action.Acknowledgement')) ->save_label(delete($query->{acknowledgement}), $req, $query); return undef unless %$query; } my($res) = ''; # Always format the keys in the same order foreach my $k (sort(keys(%$query))) { my($v) = $query->{$k}; $k = $_LQ->to_char($k) if $k =~ s/^ListQuery\.//; $res .= $_HTML->escape_query($k) . '=' # Sometimes the query value is not defined. It may # be a corrupt query, but shouldn't blow up. . $_HTML->escape_query( ref($v) ? $_U->is_blessed($v) && $v->can('as_query') ? $v->as_query : $req->isa('Bivio::Test::Request') ? "$v" : b_die($k, '=', $v, ': query value is a reference') : defined($v) ? $v : '', ) . '&'; } chop($res); return $res; } sub parse { # (proto, string) : hash_ref # Returns a hash_ref for the query string. Returns C if # string not defined. my(undef, $string) = @_; # Empty? return undef unless defined($string); # Some search engines escape the query string incorrectly. # /pub/trez_talk/msg?v=1%26t=332800003%26o=0d1a2a $string = _correct('unescape_uri', $string) if $string =~ /^(?:v=1%26|v%3d1)/i; # Some search engines don't unescape_html when parsing the page # /pub/trez_talk/msg?v=1&t=292100003&o=0d1a2a $string = _correct('unescape', $string) if $string =~ /&\w=/; # Split on & and then = my(@v); foreach my $item (split(/&/, $string)) { # While it isn't usual to have a query value with = literally, # it can happen and therefore we have the "2". my($k, $v) = split(/=/, $item, 2); # Avoid the lone "&=" case. Totally mangled query element. next unless defined($k) && length($k); # $v may not be defined. This is a malformed query, but # let's handle anyway. push(@v, $_HTML->unescape_query($k), defined($v) ? $_HTML->unescape_query($v) : undef); } # No valid elements? return undef unless @v; # Return the hash return {@v}; } sub _correct { # (string, string) : string # Corrects the URI using specified unescape method my($method, $literal) = @_; my(@msg) = ('correcting query=', $literal); my($req) = Bivio::Agent::Request->get_current; if ($req) { my($r) = $req->get('r'); push(@msg, ', uri=', $req->unsafe_get('uri'), ', referer=', $r ? $r->header_in('referer') : undef, ', client_addr=', $req->unsafe_get('client_addr'), ', user-agent=', $r ? $r->header_in('user-agent') : undef, ); } Bivio::IO::Alert->warn(@msg); return $_HTML->$method($literal); } 1;