Bivio::IO::Config
# Copyright (c) 1999-2013 bivio Software, Inc. All rights reserved. # $Id$ package Bivio::IO::Config; use strict; use base 'Bivio::UNIVERSAL'; # This is the first module to initialize. Don't import anything that # might import other bivio modules. use File::Basename (); use File::Spec (); eval(q{ use Image::Size (); use HTML::Parser (); use MIME::Entity (); }) if $] > 5.008; # C<Bivio::IO::Config> is a simple configuration mechanism. A configuration file # is a hash_ref of packages and hash_refs. Each package's hash_ref contains # configuration name/value tuples. # # Modules are dynamically configured in the order they are initialized. # Each module defines a C<handle_config> method and # calls L<register|"register"> during initialization. # # This module parses I<@ARGV> at initialization time. It removes any # arguments which are destined for this module. # # Without an argument or with just I<@ARGV>, looks for the name of # a configuration file as follows: # # # 1. # # If running setuid or setgid, skip to step 3. # # 2. # # If the environment variable I<$BCONF> is defined, # identifies the name of the configuration file which # must contain a hash. # # 3. # # The file F</etc/bivio.bconf> must exist and contain a hash. # # # If none of the files are found or they do not contain a hash, throws an # exception. # # If I<argv> is supplied and not running setuid or setgid (but may be # running as root), extracts (i.e. deletes) arguments from the # I<argv> of the form: # # --(Module.)param=value # # and sets configuration of the form: # # Module->{param} = value; # # I<param> may be of the form I<idx1.idx2.idx3> which translates to: # # Module->{idx1}->{idx2}->{idx3} = value; # # An error during evaluation causes program termination. To set a # value to undef, use the word C<undef>. # # Module defaults to C<main> if not supplied on the command line. # # This modules observes the lone B<--> convention, i.e. # parsing stops if a B<--> is encountered in the command line arguments. # # HACK: Since it is fairly common, the option I<--trace> is translated # to I<--Bivio::IO::Trace.filter> for brevity. # # NOTE: I<Module> and I<param> must contain only word characters (except # for C<::> and C<.> separators) for this syntax to work. # # If a valid configuration is found, calls packages which have # called L<register|"register">. # # # # bconf_file : string (not settable) # # This value appears in the config for Bivio::IO::Config. It is only visible # through tracing. # # trace : boolean [0] # # If true, every time the configuration changes, print all config to STDERR. Of # note is Bivio::IO::Config.bconf_file, if you are trying to debug where your # configuration is coming from. Here's how to pass it from the command line: # # my-program --trace=config # # May also be set in the config file itself. # # $BCONF # # Name of configuration file if not running setuid or setgid. # # /etc/bivio.bconf # # Name of configuration used if the program is running setuid or setgid # or the file identified by C<$BCONF> (or its default) is not found. #=VARIABLES my($_PKG) = __PACKAGE__; my($_BCONF) = undef; my($_BCONF_DIR) = undef; # The configuration read off disk or passed in my($_ACTUAL) = {}; my($_COMMAND_LINE_ARGS) = []; # List of packages registered my(@_REGISTERED) = (); # Configuration specifications for registered packages my(%_SPEC) = (); # Has a package been configured? my(%_CONFIGURED) = (); _initialize(@main::ARGV ? \@main::ARGV : []); __PACKAGE__->register(my $_CFG = { is_production => 0, is_dev => 0, }); sub DEFAULT_NAME { return ''; } sub NAMED { # Identifies the named configuration specification, see L<register|"register">. return \&NAMED; } sub REQUIRED { # Returns a unique value which passed in spec (see L<get|"get">) # will indicate the configuration parameter is required. return \&REQUIRED; } sub assert_dev { my($proto) = @_; die('may only be run on dev') unless $proto->is_dev; return; } sub assert_test { my($proto) = @_; die('may not be run on production') if $proto->is_production; return; } sub assert_version { my($proto, $version) = @_; $proto->if_version($version, 1, sub { die($version, "must be IO.Config $version or greater "); }); return; } sub bconf_dir_hashes { my($proto) = @_; return unless $_BCONF_DIR && -r $_BCONF_DIR && -d $_BCONF_DIR; my($dir) = $_BCONF_DIR; my($bconf) = $proto->bconf_file; $bconf = $1 if $bconf =~ /^(\w+)::.+->/; my($only) = "$dir/" . File::Basename::basename($bconf, '.bconf') . '-only.bconf'; return map( { my($file) = $_; my($data) = do($file) || die("$file: $@"); die($file, ': did not return a hash_ref') unless ref($data) eq 'HASH'; $data; } $only && -r $only ? $only : (), sort(grep(!/-only.bconf$/, glob("$dir/*.bconf"))), ); } sub bconf_file { # Do not rely on this being an actual file return $_BCONF; } sub bootstrap_package_dir { my(undef, $proto) = @_; return (_class_to_file(ref($proto) || $proto) =~ m{(.+)/.+?.pm$})[0]; } sub command_line_args { # Returns command line arguments, which were stripped from @ARGV return [@$_COMMAND_LINE_ARGS]; } sub get { my($proto, $name) = @_; my($res) = shift->unsafe_get(@_); unless (defined($res)) { _die($name, ': named config not found'); return {}; } return $res; } sub handle_config { my(undef, $cfg) = @_; $_CFG = $cfg; return; } sub if_version { my($proto, @cond) = @_; push(@cond, 1) if @cond == 1 && !ref($cond[0]); my($else) = @cond % 2 ? pop(@cond) : sub {}; my($version) = $_ACTUAL->{$_PKG}->{version} || 0; while (@cond) { my($cond_version, $op) = splice(@cond, 0, 2); return ref($op) eq 'CODE' ? $op->() : $op if $version >= $cond_version; } return ref($else) eq 'CODE' ? $else->() : $else; } sub introduce_values { my($proto, $new_values) = @_; # Adds I<new_values> to the running programs configuration. This routine should # be called sparingly. There's no guarantee running programs can handle dynamic # reconfiguration. L<handle_config|"handle_config"> will be called. # # Typical usage: # # BEGIN { # use Bivio::IO::Config; # Bivio::IO::Config->introduce_values({ # value1 => ..., # }); # } # # The earlier in the program's initialization process this is executed, the less # likely it is to cause problems. #TODO: Named config defaults don't get filled in die('new_values must be a hash_ref') unless ref($new_values) eq 'HASH'; $_ACTUAL = $proto->merge($new_values, $_ACTUAL); _actual_changed(); return; } sub is_dev { return $_CFG->{is_dev} ? 1 : 0; } sub is_production { return $_CFG->{is_production}; } sub is_test { return $_CFG->{is_production} ? 0 : 1; } sub merge { my($proto, $custom, $defaults, $merge_arrays) = @_; # Creates a new hash_ref by copying I<custom> values int a I<default> # configuration. Most applications have a common set of configuration which they # should define in a perl module. Development, test, and production # configurations can then be customized more easily without having to edit lots # of files. # # For example, your I<bconf> file might be defined as follows: # # # # # My development configuration # # # use strict; # use OurSite::BConf; # OurSite::BConf->merge({ # 'Bivio::UI::Facade' => { # http_host => 'myhost.oursite.com:8888', # mail_host => 'myhost.oursite.com', # }, # 'Bivio::UI::FacadeComponent' => { # die_on_error => 1, # }, # }); # # The class I<OurSite::BConf> might contain the standard production # configuration, which will be overridden by the custom configuration above: # # sub merge { # my($proto, $custom) = @_; # return Bivio::IO::Config->merge($custom, { # 'Bivio::UI::FacadeComponent' => { # # Production systems don't die if can't find component values, # # just return "undef" configuration. # die_on_error => 0, # }, # 'Bivio::UI::Facade' => { # http_host => 'www.oursite.com', # mail_host => 'oursite.com', # }, # 'Bivio::Die' => { # stack_trace_error => 1, # }, # 'Bivio::IO::ClassLoader' => { # delegates => { # 'Bivio::Agent::TaskId' => 'OurSite::Agent::TaskId', # 'Bivio::Agent::HTTP::Cookie' => 'OurSite::Agent::Cookie', # 'Bivio::TypeError' => 'OurSite::TypeError', # }, # maps => { # Model => ['OurSite::Model', 'Bivio::Biz::Model'], # Type => ['OurSite::Type', 'Bivio::Type'], # HTMLWidget => ['Bivio::UI::HTML::Widget', 'Bivio::UI::Widget'], # HTMLFormat => ['Bivio::UI::HTML::Format'], # MailWidget => ['Bivio::UI::Mail::Widget', 'Bivio::UI::Widget'], # FacadeComponent => ['OurSite::UI', 'Bivio::UI'], # Facade => ['OurSite::UI::Facade'], # Action => ['OurSite::Action', 'Bivio::Biz::Action'], # }, # }, # }); # } # # If I<merge_arrays> is true, then arrays in I<defaults> will be with # arrays in I<custom>. Most commonly used for maps, e.g., # # merge({ # maps => { # Model => ['OurSite:Model'], # }, # }, # }, { # maps => { # Model => ['Bivio::Biz::Model'], # }, # }, # }, # 1, # ); # # yields: # # { # maps => { # Model => ['OurSite:Model', 'Bivio::Biz::Model'], # }, # }; # Make a copy, so we don't modify original values in defaults my($result) = {%$defaults}; while (my($key, $value) = each(%$custom)) { $result->{$key} = ref($result->{$key}) eq ref($value) ? ref($value) eq 'HASH' ? $proto->merge($value, $result->{$key}, $merge_arrays) : ref($value) eq 'ARRAY' && $merge_arrays ? [@$value, @{$result->{$key}}] : $value : $value; } return $result; } sub merge_list { my($proto, @cfg) = @_; # Returns a merge by applying any number of I<custom> values to I<defaults>. # Calls L<merge|"merge"> from right to left. my($res) = {}; foreach my $c (reverse(@cfg)) { $res = $proto->merge($c, $res); } return $res; } sub register { my($proto, $spec) = @_; # Calling package will be put in the list of packages to be configured. A # callback to L<handle_config|"handle_config"> will happen # during the call to this method. # # The calling package must define a L<handle_config|"handle_config"> method which # takes two arguments, the class and the configuration as a hash. # # If I<spec> is supplied, the values will be filled in when L<get|"get"> is # called or the values are upcalled to L<handle_config|"handle_config">. # # A configuration I<spec> looks like: # # { # 'my_optional_param' => 35, # 'my_required_param' => Bivio::IO::Config->REQUIRED, # Bivio::IO::Config->NAMED => { # 'my_named_optional_param' => 'hello', # 'my_named_required_param' => Bivio::IO::Config->REQUIRED, # } # } # # Named configuration allows the package's configuration to be separately # named. For example, you might have several named databases you want # to configure. Named configuration is initialized from three locations: # # # * # # A specifically named configuration section, e.g. C<my_server>. # # * # # The parameters found in the (unnamed) common part of the configuration # using the names found in the L<NAMED|"NAMED"> part of the specification. # # * # # Lastly, the default values specified in the L<NAMED|"NAMED"> specification. # # # All configuration names must be fully specified. my($pkg) = caller; die("&$pkg\::handle_config not defined") unless defined(&{$pkg . '::handle_config'}); push(@_REGISTERED, $pkg); $_SPEC{$pkg} = $spec; &{\&{$pkg . '::handle_config'}}($pkg, _get_pkg($pkg)); return; } sub unsafe_get { my($proto, $name) = @_; # Looks up configuration for the caller's package (default). If name is # provided, returns the configuration hash bound to I<name> within the package's # configuration space, e.g. given the config: # # 'Bivio::IPC::Server' => { # 'listen' => 35, # 'my_server' => { # 'port' => 1234, # 'timeout' => 60_000, # }, # 'my_other_server' => { # 'port' => 9999, # }, # } # # C<get('my_server')> will return the following hash: # # { # 'listen' => 35, # 'port' => 1234, # 'timeout' => 60_000, # } # # Required configuration is checked during this call. # # If I<name> is passed but is undefined, then only the named configuration # parameters will be returned. # # If I<name> is not passed, then the entire configuration will be returned, # including specific named sections. # # If I<name> is prefixed by a package separated by a '.', then the # config for that element of that package is returned. my($pkg); if (($name || '') =~ /^([\w:]+)\.(\w+)$/) { $pkg = $1; $name = $2; } elsif (($name || '') =~ /::/) { $pkg = $name; $name = undef; pop(@_); } else { my($i) = 0; 0 while ($pkg = caller($i++)) eq __PACKAGE__; $name = undef unless defined($name) && length($name); } my($pkg_cfg) = _get_pkg($pkg); return $pkg_cfg if @_ < 2; my($spec) = $_SPEC{$pkg}; die("$pkg: NAMED config not specified by this package. You can't retrieve values from a config hash with get(). Only for named configuration or whole package") unless defined($spec) && defined($spec->{$proto->NAMED}); return defined($pkg_cfg->{$name}) ? $pkg_cfg->{$name} : undef if defined($name); # Retrieve the "undef" config, see _get_pkg my($cfg) = $pkg_cfg->{$proto->NAMED}; my(@bad) = grep( defined($cfg->{$_}) && $cfg->{$_} eq $proto->REQUIRED, keys(%$cfg), ); _die("$pkg.(" . join(' ', sort(@bad)), '): named config required') if @bad; return $cfg; } sub _actual_changed { # Call handlers and dump config, if debug option set. eval(q{ use Data::Dumper; my($dd) = Data::Dumper->new([$_ACTUAL]); $dd->Indent(1); $dd->Terse(1); $dd->Deepcopy(1); print(STDERR "Configuration is: ", $dd->Dumpxs(), "\n"); }) if $_ACTUAL->{$_PKG}->{trace}; foreach my $pkg (@_REGISTERED) { &{\&{$pkg . '::handle_config'}}($pkg, _get_pkg($pkg)); } return; } sub _class_to_file { my($class) = @_; return $INC{ join( '/', split(/::/, $class), ) . '.pm' } || die("$class: package not in \$INC\n"); } sub _die { die(@_) unless $_ACTUAL->{$_PKG}->{ignore_errors}; warn(@_); return; } sub _get_pkg { my($pkg) = @_; # Returns the config for pkg $_CONFIGURED{$pkg} && return $_ACTUAL->{$pkg}; my($actual) = ref($_ACTUAL->{$pkg}) ? $_ACTUAL->{$pkg} : {}; if ($_SPEC{$pkg}) { # Set the defaults for the common configuration my($spec) = $_SPEC{$pkg}; while (my($k, $v) = each(%$spec)) { # If it is required, then it is an error if (defined($v) && $v eq __PACKAGE__->REQUIRED) { _die("$pkg.$k: config parameter not defined.") unless defined($actual->{$k}); next; } # Have an actual value for specified config? exists($actual->{$k}) && next; # Is the named spec? $k eq &NAMED && next; # Assign the default value $actual->{$k} = $v; } # Set the defaults for all named configuration if (defined($spec->{__PACKAGE__->NAMED})) { my($named_spec) = $spec->{__PACKAGE__->NAMED}; # Fill in the actual for the "undef" case of &get my($undef_cfg) = {%$named_spec}; while (my($k, $v) = each(%$actual)) { # Does a spec exist for this param? exists($spec->{$k}) && next; # Does a named spec exist for this param? if (exists($named_spec->{$k})) { # Override named default with actual config $undef_cfg->{$k} = $v; next; } # Must be a named configuration section my($named_actual) = $v; unless (ref($named_actual)) { _die("$pkg.$k: invalid config parameter"); $named_actual = {}; } while (my($nk, $nv) = each(%$named_spec)) { # If it is required, then must be defined (not just exists) if (defined($nv) && $nv eq &REQUIRED) { # Defined in named section? defined($named_actual->{$nk}) && next; # Defined in common section? if (defined($actual->{$nk})) { $named_actual->{$nk} = $actual->{$nk}; next; } _die("$pkg.$nk: named config parameter not defined"); } else { # Have an actual value for specified named config? exists($named_actual->{$nk}) && next; } # Have an actual value in the common area? if (exists($actual->{$nk})) { $named_actual->{$nk} = $actual->{$nk}; next; } # Assign the default value (not found in either section) $named_actual->{$nk} = $nv; } } # Overload the use of "NAMED" to mean undef named cfg # in actual configuration. $actual->{&NAMED} = $undef_cfg; } } $_CONFIGURED{$pkg} = 1; return $_ACTUAL->{$pkg} = $actual; } sub _initialize { my($argv) = @_; # Initializes the configuration from I<config> hash. %_CONFIGURED = (); # On failure, we have no configuration. $_ACTUAL = undef; my($is_setuid) = !($< == $> && $( == $)); # If we are setuid or setgid, then don't _initialize from # environment variables or files in the current directory. # /etc/bivio.bconf is last resort if the file doesn't exist. $_BCONF = $ENV{BCONF}; if ($is_setuid && defined($_BCONF)) { warn("$ENV{BCONF}: ignoring \$BCONF while running setuid\n"); $ENV{BCONF} = $_BCONF = undef; } my($bconf_exists) = sub { my($bconf) = @_; return defined($bconf) && -f $bconf && -r $bconf ? $bconf : undef; }; $_BCONF ||= $bconf_exists->('/etc/bivio.bconf'); if (!$_BCONF || $ENV{BIVIO_HTTPD_PORT} && $_BCONF =~ /(?:\:\:|^[A-Z]\w+$)/ ) { $_BCONF ||= 'Bivio::DefaultBConf->merge'; $_BCONF .= '::BConf' unless $_BCONF =~ /BConf(?:$|\-\>)/; my($class) = $_BCONF =~ /(.+::\w+)/; $_BCONF .= '->dev' unless $_BCONF =~ /\-\>/; # $_BCONF_DIR must be set to something $_BCONF_DIR = $is_setuid ? () : (grep( -d $_, "$ENV{HOME}/bconf.d", "$ENV{HOME}/bconf/bconf.d", ))[0]; eval(" use $class; \$_ACTUAL = $_BCONF; "); } elsif ($bconf_exists->($_BCONF)) { $_BCONF = File::Spec->rel2abs($_BCONF); $_BCONF_DIR = File::Spec->catfile( File::Basename::dirname($_BCONF), 'bconf.d', ); $_ACTUAL = do($_BCONF); } die($_BCONF || '<undef>', ': bconf error: ', $@ || 'Must return hash ref') unless ref($_ACTUAL) eq 'HASH'; ($_ACTUAL->{$_PKG} ||= {})->{bconf_file} = $_BCONF; # Only process arguments in not_setuid case _process_argv($_ACTUAL, $argv) unless $is_setuid; _actual_changed(); return; } sub _process_argv { my($actual, $argv) = @_; # Inserts applicable command line arguments in $argv to $actual. for (my($i) = 0; $i < int(@$argv); $i++) { my($a) = $argv->[$i]; # Lone '--' means we're done $a =~ /^--$/s && last; # HACK: Probably want to generalize(?) $a =~ s/^--(?:TRACE|trace)=/--Bivio::IO::Trace.command_line_arg=/s; # Matches our form? (my($m, $p, $v) = $a =~ /^--([\w:]+)([\.\w]+)*=(.*)$/s) || next; # Need to default to package main? # (Convention: packages begin with upper-case letter) if ($m =~ /^[a-z0-9_]+$/ && $m ne 'main') { $p = defined($p) ? ($m . $p) : $m; $m = 'main'; } else { # Kill leading '.' substr($p, 0, 1) = ''; } $v eq 'undef' && ($v = undef); # Ensure the hashes exist down the chain, starting at the module ($m) # perl in Lispish my($ref, $car, $cdr) = ($actual, $m, $p); while (defined($cdr) && length($cdr)) { exists($ref->{$car}) || ($ref->{$car} = {}); $ref = $ref->{$car}; ($car, $cdr) = split(/\./, $cdr, 2); } $ref->{$car} = $v; # Get rid of processed parameter push(@$_COMMAND_LINE_ARGS, splice(@$argv, $i--, 1)); } ($actual->{'Bivio::IO::Config'} ||= {})->{trace} = 1 if (($actual->{'Bivio::IO::Trace'} || {})->{command_line_arg} || '') eq 'config'; return; } 1;