webperl/Utils.pm

532 lines
18 KiB
Perl

## @file
# System-wide utility functions. The functions in this file may be useful at
# any point throughout the system, so they are collected here to prevent the
# need for multiple copies around various modules.
#
# @author Chris Page <chris@starforge.co.uk>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
## @class
# System-wide utility functions. The functions in this file may be useful at
# any point throughout the system, so they are collected here to prevent the
# need for multiple copies around various modules.
package Utils;
require Exporter;
use File::Spec;
use File::Path;
use POSIX qw(strftime);
use strict;
our @ISA = qw(Exporter);
our @EXPORT = qw();
our @EXPORT_OK = qw(path_join resolve_path check_directory load_file save_file superchomp trimspace lead_zero string_in_array blind_untaint title_case sentence_case is_defined_numeric rfc822_date get_proc_size find_bin untaint_path read_pid write_pid hash_or_hashref);
# ============================================================================
# File and path related functions
## @fn $ path_join(@fragments)
# Take an array of path fragments and concatenate them together. This will
# concatenate the list of path fragments provided using '/' as the path
# delimiter (this is not as platform specific as might be imagined: windows
# will accept / delimited paths). The resuling string is trimmed so that it
# <b>does not</b> end in /, but nothing is done to ensure that the string
# returned actually contains a valid path.
#
# @param fragments The path fragments to join together.
# @return A string containing the path fragments joined with forward slashes.
sub path_join {
my @fragments = @_;
my $leadslash;
# strip leading and trailing slashes from fragments
my @parts;
foreach my $bit (@fragments) {
# Skip empty fragments.
next unless($bit);
# Determine whether the first real path has a leading slash.
$leadslash = $bit =~ m|^/| unless(defined($leadslash));
# Remove leading and trailing slashes
$bit =~ s|^/*||; $bit =~ s|/*$||;
# If the fragment was nothing more than slashes, ignore it
next unless($bit);
# Store for joining
push(@parts, $bit);
}
# Join the path, possibly including a leading slash if needed
return ($leadslash ? "/" : "").join("/", @parts);
}
## @fn $ resolve_path($path)
# Convert a relative (or partially relative) file into a truly absolute path.
# for example, /foo/bar/../wibble/ptang becomes /foo/wibble/ptang and
# /foo/bar/./wibble/ptang becomes /foo/bar/wibble/ptang
#
# @param path The path to convert to an absolute path
# @return The processed absolute path.
sub resolve_path {
my $path = shift;
# make sure the path is absolute to begin with
$path = File::Spec -> rel2abs($path) if($path !~ /^\//);
my ($vol, $dirs, $file) = File::Spec -> splitpath($path);
my @dirs = File::Spec -> splitdir($dirs);
my $i = 0;
# loop through all the directories removing relative and current entries.
while($i < scalar(@dirs)) {
# each time a '..' is encountered, remove it and the preceeding entry from the array.
if($dirs[$i] eq "..") {
die "Attempt to normalise a relative path!" if($i == 0);
splice(@dirs, ($i - 1), 2);
$i -= 1; # move back one level to account for the removal of the preceeding entry.
# single '.'s - current dir - can just be stripped without touching previous entries
} elsif($dirs[$i] eq ".") {
die "Attempt to normalise a relative path!" if($i == 0);
splice(@dirs, $i, 1);
# do not update $i at this point - it will be looking at the directory after the . now.
} else {
++$i;
}
}
return File::Spec -> catpath($vol, File::Spec -> catdir(@dirs), $file);
}
## @fn void check_directory($dirname, $title, $options)
# Apply a number of checks to the specified directory. This will check
# various attribues of the specified directory and if any of the checks
# fail, this will die with an appropriate message. If all the checks pass,
# this will return silently. The optional options hash controls which
# checks are performed on the directory:
#
# exists If true, the specified directory must exist. If false, the
# existence of the directory is not enforced. If not specified,
# this check defaults to true.
# nolink If true, the directory must be a real, physical directory, it
# must not be a shambolic link. If false, it can be either. If not
# specified, this defaults to false (don't check).
# checkdir If true, verify that the directory is actually a directory and
# not a file or other special directory entry. If false, don't
# bother checking. If not specified, this defaults to true.
#
# @note If 'checkdir' is set to true, the function will die with a fatal
# error if the directory does not exist even if 'exists' is false.
# @param dirname The directory to check
# @param title A human-readable description of the directory.
# @param options A reference to a hash of options controlling the checks.
sub check_directory {
my $dirname = shift;
my $title = shift;
my $options = shift;
$options -> {"exists"} = 1 if(!defined($options -> {"exists"}));
$options -> {"nolink"} = 0 if(!defined($options -> {"nolink"}));
$options -> {"checkdir"} = 1 if(!defined($options -> {"checkdir"}));
die "FATAL: The specified $title does not exist.\n"
unless(!$options -> {"exists"} || -e $dirname);
die "FATAL: The specified $title is a link, please only use real directories.\n"
if($options -> {"nolink"} && -l $dirname);
die "FATAL: The specified $title is not a directory.\n"
unless(!$options -> {"checkdir"} || -d $dirname);
}
## @fn $ load_file($name)
# Load the contents of the specified file into memory. This will attempt to
# open the specified file and read the contents into a string. This should be
# used for all file reads whenever possible to ensure there are no internal
# problems with UTF-8 encoding screwups.
#
# @param name The name of the file to load into memory.
# @return The string containing the file contents, or undef on error. If this
# returns undef, $! should contain the reason why.
sub load_file {
my $name = shift;
if(open(INFILE, "<:utf8", $name)) {
undef $/;
my $lines = <INFILE>;
$/ = "\n";
close(INFILE)
or return undef;
return $lines;
}
return undef;
}
## @fn $ save_file($name, $data)
# Save the specified string into a file. This will attempt to open the specified
# file and write the string in the second argument into it, and the file will be
# truncated before writing. This should be used for all file saves whenever
# possible to ensure there are no internal problems with UTF-8 encoding screwups.
#
# @param name The name of the file to load into memory.
# @param data The string, or string reference, to save into the file.
# @return undef on success, otherwise this dies with an error message.
# @note This function assumes that the data passed in the second argument is a string,
# and it does not do any binmode shenanigans on the file. Expect it to break if
# you pass it any kind of binary data, or use this on Windows.
sub save_file {
my $name = shift;
my $data = shift;
if(open(OUTFILE, ">:utf8", $name)) {
print OUTFILE ref($data) ? ${$data} : $data;
close(OUTFILE)
or die "FATAL: Unable to close $name after write: $!\n";
return undef;
}
die "FATAL: Unable to open $name for writing: $!\n";
}
# ============================================================================
# String modification functions
## @fn void superchomp($line)
# Remove any white space or newlines from the end of the specified line. This
# performs a similar task to chomp(), except that it will remove <i>any</i> OS
# newline from the line (unix, dos, or mac newlines) regardless of the OS it
# is running on. It does not remove unicode newlines (U0085, U2028, U2029 etc)
# because they are made of spiders.
#
# @param line A reference to the line to remove any newline from.
sub superchomp(\$) {
my $line = shift;
$$line =~ s/(?:[\s\x{0d}\x{0a}\x{0c}]+)$//o;
}
## @fn $ trimspace($data)
# Remove whitespace from the start and end of the specified string, and
# return the stripped string.
#
# @param data The string to remove leading and trailing whitespace from.
# @return The stripped string.
sub trimspace {
my $data = shift;
$data =~ s/^[\s\x{0d}\x{0a}\x{0c}]+//o;
$data =~ s/[\s\x{0d}\x{0a}\x{0c}]+$//o;
return $data;
}
## @fn $ lead_zero($value)
# Ensure that the specified value starts with 0 if it is less than 10
# and does not already start wiht 0 (so '9' will become '09' but '15'
# will not be altered, nor will '05').
#
# @param value The value to check
# @return The value with a lead 0 if it does not have one already and needs it.
sub lead_zero {
my $value = shift;
return "0$value" if($value < 10 && $value !~ /^0/);
return $value;
}
## @fn $ string_in_array($arrayref, $value)
# Determine whether the specified value exists in an array. This does a simple
# interative serach over the array to determine whether value is present in the
# array.
#
# @param arrayref A reference to the array to search.
# @param value The value to search for in the array.
# @return The index of the value on success, undef if the value is not in the array.
sub string_in_array {
my $arrayref = shift;
my $value = shift;
# can't be in an undefined list by definition.
return undef if(!$arrayref);
my $size = scalar(@{$arrayref});
for(my $pos = 0; $pos < $size; ++$pos) {
return $pos if($arrayref -> [$pos] eq $value);
}
return undef;
}
## @fn $ blind_untaint($str)
# Untaint the specified string blindly. This should generally only be used in
# situations where the string is guaranteed to be safe, it just needs to be
# untainted.
#
# @param str The string to untaint
# @return The untainted string
sub blind_untaint {
my $str = shift;
my ($untainted) = $str =~ /^(.*)$/;
return $untainted;
}
## @fn void title_case($strref, $punc_border)
# Convert the words in the provided string to titlecase. This will process all the
# words in the string referred to by the argument into titlecase, to avoid situations
# where allcaps/alllower input has been provided for a string that does not look
# good that way.
#
# @param strref A reference to the string to convert.
# @param punc_border If true, punctuation is treated as boundary character, otherwise
# only the start or end of the string or space is treated as a
# word boundary.
sub title_case(\$$) {
my $strref = shift;
my $punc_border = shift;
if($punc_border) {
$$strref =~ s/\b(.*?)\b/ucfirst(lc($1))/ge;
} else {
$$strref =~ s/(^|\s)((?:\S|\z)+)/$1.ucfirst(lc($2))/gem;
}
# Fix up entities
$$strref =~ s/(&[a-z]+;)/lc($1)/ge;
}
## @fn void sentence_case($strref)
# Convert the words in the provided string to sentence case. This will process all the
# words in the string referred to by the argument to convert the string to sentence case,
# to avoid situations where allcaps/alllower input has been provided for a string that
# does not look good that way.
#
# @param strref A reference to the string to convert.
sub sentence_case(\$) {
my $strref = shift;
$$strref = ucfirst(lc($$strref));
}
# ============================================================================
# CGI Convenience functions
## @fn $ is_defined_numeric($cgi, $param)
# Determine whether the specified cgi parameter is purely numeric and return it
# if it is. If the named parameter is not entirely numeric, this returns undef.
#
# @param cgi The cgi handle to check the parameter through.
# @param param The name of the cgi parameter to check.
# @return The numeric value in the parameter, or undef if it is not purely numeric.
sub is_defined_numeric {
my ($cgi, $param) = @_;
if(defined($cgi -> param($param)) && $cgi -> param($param) !~ /\D/) {
return $cgi -> param($param);
}
return undef;
}
# ============================================================================
# Miscellaneous functions
## @fn $ rfc822_date($timestamp)
# Convert a unix timestamp into a rfc822-formatted date string. This is guaranteed
# to generate a RFC822 date string (unlike strftime, which could generate week and
# month names in another language in other locales)
#
# @param timestamp The unix timestamp to convert to rfc822 format
# @return The rfc822 time string
sub rfc822_date {
my $timestamp = shift;
# set up constants we'll need
my @days = ("Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat");
my @mons = ("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec");
my @ts = localtime($timestamp);
return sprintf("%s, %02d %s %4d %02d:%02d:%02d %s",
$days[$ts[6]], $ts[3], $mons[$ts[4]], $ts[5] + 1900,
$ts[2], $ts[1], $ts[0],
strftime("%Z", @ts));
}
## @method $ hash_or_hashref(@args)
# Given a list of arguments, if the first argument is a hashref it is returned,
# otherwise if the list length is nonzero and even, the arguments are shoved
# into a hash and a reference to that is returned. If the argument list is
# empty or its length is odd, and empty hashref is returned.
#
# @param args A list of arguments, may either be a hashref or a list of key/value
# pairs to place into a hash.
# @return A hashref.
sub hash_or_hashref {
my $len = scalar(@_);
return {} unless($len);
# Even number of args? Shove them into a hash and get a ref
if($len % 2 == 0) {
return { @_ };
# First arg is a hashref? Return it
} elsif(ref($_[0]) eq "HASH") {
return $_[0];
}
# No idea what to do, so give up.
return {};
}
# ============================================================================
# OS specific functions
## @fn $ get_proc_size()
# Determine how much memory the current process is using. This examines the process'
# entry in proc, it's not portable, but frankly I don't care less about that.
#
# @return The process virtual size, in bytes, or -1 if it can not be determined.
sub get_proc_size {
# We don't need no steenking newlines
my $nl = $/;
undef $/;
# Try to open and read the process' stat file
open(STAT, "/proc/$$/stat")
or die "Unable to read stat file for current process ($$)\n";
my $stat = <STAT>;
close(STAT);
# Now we need to pull out the vsize field
my ($vsize) = $stat =~ /^[-\d]+ \(.*?\) \w+ [-\d]+ [-\d]+ [-\d]+ [-\d]+ [-\d]+ [-\d]+ [-\d]+ [-\d]+ [-\d]+ [-\d]+ [-\d]+ [-\d]+ [-\d]+ [-\d]+ [-\d]+ [-\d]+ [-\d]+ [-\d]+ [-\d]+ ([-\d]+)/;
return $vsize || -1;
}
## @fn $ find_bin($name, $search)
# Attempt to locate the named binary file on the filesystem. This will search several
# standard paths for the named binary (much like the shell will search its path,
# except that this is not subject to environment pollution), and if it is located the
# full path is returned.
#
# @param name The name of the binary to locate.
# @param search An optional reference to an array of locations to look in for the
# binary. Defaults to ['/usr/bin', '/bin', '/opt/bin', '/usr/local/bin']
# Paths are searched first to last, and the path of the first matching
# binary user can execute is used.
# @return A string containing the path of the binary, or undef on error.
sub find_bin {
my $name = shift;
my $search = shift || ['/usr/bin', '/bin', '/opt/bin', '/usr/local/bin'];
foreach my $path (@{$search}) {
my $check = path_join($path, $name);
return $check if(-f $check && -x $check);
}
return undef;
}
## @fn $ untaint_path($taintedpath)
# Untaint the path provided. This will attempt to pull a valid path out of
# the specified tainted path - note that this is rather stricter about
# path contents than strictly necessary, and it will only allow alphanumerics,
# /, . and - in paths.
#
# @param taintedpath The tainted path to untaint.
# @return The untainted path, or undef if the path can not be untainted.
sub untaint_path {
my $taintedpath = shift;
my ($untainted) = $taintedpath =~ m|^(/?(?:[-\w.]+)(?:/[-\w.]+)*)$|;
return $untainted;
}
# ============================================================================
# PID storage and retieval
## @fn void write_pid($filename)
# Write the process id of the current process to the specified file. This will
# attempt to open the specified file and write the current processes' ID to
# it for use by other processes.
#
# @param filename The name of the file to write the process ID to.
sub write_pid {
my $filename = shift;
open(PIDFILE, "> $filename")
or die "FATAL: Unable to open PID file for writing: $!\n";
print PIDFILE $$;
close(PIDFILE);
}
## @fn $ read_pid($filename)
# Attempt to read a PID from the specified file. This will read the file, if possible,
# and verify that the content is a single string of digits.
#
# @param filename The name of the file to read the process ID from.
# @return The process ID. This function will die on error.
sub read_pid {
my $filename = shift;
open(PIDFILE, "< $filename")
or die "FATAL: Unable to open PID file for reading: $!\n";
my $pid = <PIDFILE>;
close(PIDFILE);
chomp($pid); # should not be needed, but best to be safe.
my ($realpid) = $pid =~ /^(\d+)$/;
die "FATAL: PID file does not appear to contain a valid process id.\n"
unless($realpid);
return $realpid;
}
1;