#!/usr/bin/perl -w

#
# $Id: PmHSM.pl,v 0.6 2002/02/11 22:00:01 mjacobs Exp $
#
###############################################################################
#
# PmHSM - tool for creating CD/DVD-images from directory trees and replace
#         every file (HSM) or the base directory with symbolic link to
#         file/directory position in the exported image
# Copyright (C) 2002 Martin Jacobs
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
###############################################################################
#
# ****************************
# ** PmHSM - Poor Man's HSM **
# ****************************
#
# Idea:		Store locally saved files transparently on an external media
#			like RW-CD-ROM or MO-discs (future option). mkisofs and cdrecord
#			are our companions to generate images and write them to CD/DVD
#			media.
#
#			Exported files will be replaced by symbolic links so they can
#			be accessed if the export media is mounted.
#
# Example	There is a file on the local file system
#
#			/home/username/download/ImageMagick-5.1.1.tar.gz
#
#			which will be replaced by a link to the external media
#
#			/cdrom/home/username/download/ImageMagick-5.1.1.tar.gz
#
# PmHSM's purpose is to move seldom used files to an external media. Usage is
# quite simple. PmHSM has several steps of operation:
#			- create an image;
#			- verify a mounted image with existing data;
#			- replace local data with links to external data;
# It's planned to extend PmHSM with mode ARCHIVE to replace a full directory
# tree with one link to it's external representation.
#
# PmHSM needs the following information:
#			- the prefix of the exported directory hierarchy, see the above
#			  example, there it is /cdrom/hsm;
#			- one or more directory paths which should be processed;
#			- for generating CD-Images a target file name;
#			- optional filter criteria (last file usage, include pattern,
#			  exclude pattern);
#
# ToDo:		- apropriate reaction to signals;
#			- more differenciated messages (warning, error, ...), skipping a 
#			  file or directory in HSM mode is harmless, but it's a reason to
#			  abort ARCHIVE mode!
#			- more user friendly front-end, maybe gui;
#			- add a session tag to be used as virtual root above the 
#			  mount-point, think of versioning, autoincrement number in hidden
#			  file (e.g .pmshm);
#			- remove longest common source path;
#			- optional do copying files instead of building an iso-image, this
#			  may be useful with MO-drives and media or UDF on CD-ROM/DVD;
#			- extend this tool for archieving purposes, add an image id;
#			- remove complete subtree (Albrecht) and replace the root of the 
#			  subtree with a symbolic links (seems to be the same as archieving);
#			- generate file list with image id;

require 5.004;		# This is the version under which this code was developed.

use strict;
use English;
use Getopt::Long;
use vars qw();
use Config;
use Cwd;					# We need cwd()
use File::Path;
use File::Basename;
use File::Compare;

#
# Callback routine as handler for non-option arguments which shall be
# regarded as directory source names and collected in the global array
# SrcDirs::main.
#
sub CollectDirnames
{
	my $source = shift;
	push @SrcDirs::main, $source;
}


#
# Scan through directories, push every file with it's full pathname
# onto the file stack, recurse into directories.
#
sub Scan {
	my ($rFiles, $rDirs, $dir, $Prefix, $notaccessedfor, $level) = @_;
	my @allentries = ();
	my $workingdir = cwd();		# Needed to make absolute source pathnames.
	my ($target, $tmp_source);

	if ($SigCount::main > 0) {
		$nRecursion::main--;
		return;
	}

	# open this directory and read it's entries.
	opendir MYDIR, $dir or return -1;
	@allentries = readdir MYDIR;
	closedir MYDIR;

	# Push the name of this directory onto the stack:
	if (-w $dir and -x $dir) {
		my $source = $dir;

		# Prepare the path name to which the symbolic link will
		# show on the external media.
		$target = $Prefix . (($Prefix =~ m/\/$/o) ? '' : '/');
		if ($IsDosWin::main) {
			# To catenate the pathnames we may have to strip off a leading
			# drive letter colon combination:
			$target .= ($source =~ s/^[a-zA-Z]\://o);
		} else {
			$target .= $source;
		}

		$tmp_source = $dir;
		if ($IsDosWin::main) {
			# Strip off a leading drive letter:
			$tmp_source = $tmp_source =~ s/^[a-zA-Z]\://io;
		}
		$tmp_source = $TargetDir::main . ($tmp_source =~ m+^/+ ? '' : '/') . $tmp_source; 
		$Summary::main{'scan_diradd'}++;
		push @$rDirs, {
			'stat' => [ stat($dir) ],
			source => $dir,
			target => $target,				# absolute path on external media.
			tmp_source => $tmp_source,
			level => $level,				# tree level relative to source dir.
			};
	} else {
		$Summary::main{'scan_skipdir'}++;
		print "Skip directory $dir.\n" if ($VerboseFlag::main);
		$nRecursion::main--;
		return;
	}
	
	# Now scan through the entries:
	foreach my $entry (@allentries) {
		if ($SigCount::main > 0) {
			last;
		}
		my $source = $dir . '/' . $entry;
		# ignore '.' and '..' entries;
		next if ($entry =~ m/^\.{1,2}$/o); 

		if (-d $source) {
			# Recurse into next hierarchy:
			$nRecursion::main++;
			Scan(\@$rFiles, \@$rDirs, $source, $Prefix, $notaccessedfor, $level + 1);
			next;
		}

		my $abssource = $source;
		# Does abssource start with workingdir? Then it's an absolute path.
		# This should be OS independend, hopefully.
		if (not $abssource =~ m/^$workingdir/) {
			# No, so this should be a relative path, make it absolute:
			$abssource = $workingdir . '/' . $source;
		}

		# Prepare the path name to which the symbolic link will
		# show on the external media.
		$target = $Prefix . (($Prefix =~ m/\/$/o) ? '' : '/');
		if ($IsDosWin::main) {
			# To catenate the pathnames we may have to strip off a leading
			# drive letter colon combination:
			$target .= ($source =~ s/^[a-zA-Z]\://o);
		} else {
			$target .= $source;
		}

		# Prepare the path name of the file on the shadow tree.
		$tmp_source = $source;
		if ($IsDosWin::main) {
			# Strip a leading drive letter:
			$tmp_source = $tmp_source =~ s/^[a-zA-Z]\://io;
		}
		$tmp_source = $TargetDir::main . ($tmp_source =~ m+^/+ ? '' : '/') . $tmp_source;

		# Process plain files and symbolic links which can be deleted:
		my @filestat = stat($source);
		my $fileage = -A _;
		my $IsPlainFile = -f _;
		my $IsSymLink = -l $source;
		if ($IsPlainFile) {
			# ignore access time stamp on exported file.
			if ($IsSymLink or $notaccessedfor <= $fileage) {
				push @$rFiles, {
					'stat' => [ @filestat ],	# Save the dir-info for later use.
					source => $source,			# absolute or relative path to source file.
					abssource => $abssource,	# absolute path to source file.
					target => $target,			# absolute path on external media.
					tmp_source => $tmp_source,	# absolute path on shadow tree.
					level => $level,			# tree level, relative to source dir.
					};
				$Summary::main{'scan_fileadd'}++;
				print "Add file  $source\n" if ($VerboseFlag::main);
				next;
			} else {
				# File does not satisfy last access condition.
				$Summary::main{'scan_skipage'}++;
				print "Skip file $source, age in days since last access is less than $notaccessedfor days.\n" if ($VerboseFlag::main);
				next;
			}
		}

		if ($IsSymLink) {
			$Summary::main{'scan_skipsym'}++;
			print "Skip sym link $source.\n" if ($VerboseFlag::main);
			next;
		}

		$Summary::main{'scan_skipother'}++;
		print "Skip $source, it's neither a plain file nor a directory.\n" if ($VerboseFlag::main);
	}
	$nRecursion::main--;
	return;
}



#
# Create the shadow tree, which contains symbolical links to the source
# files. This tree will be source for image generation.
#
sub CreateShadowTree {
	my ($rDirs, $rFiles) = @_;

	# First create the directory tree:
	foreach my $entry (@$rDirs) {
		if ($SigCount::main > 0) {
			return 2;
		}
		mkpath($entry->{tmp_source}, $VerboseFlag::main, $TempPermission::main) 
			or die "Failed to create directory $entry->{tmp_source}\n";
	}

	# Second create the symbolic links:
	foreach my $entry (@$rFiles) {
		if ($SigCount::main > 0) {
			return 2;
		}
		if (symlink($entry->{abssource}, $entry->{tmp_source}) == 0) {
			print "Error: failed to create symbolic link $entry->{tmp_source}\n";
			return 1;
		}
	}

	# Third restore time-, ownership and permissions.
	foreach my $entry (@$rDirs) {
		if ($SigCount::main > 0) {
			return 2;
		}
		my ($rc, $action);
		$rc = 1;
		if ($rc == 1) {
			$action = 'set time stamp of';
			$rc = utime $entry->{'stat'}[8], $entry->{'stat'}[9], $entry->{tmp_source} if ($rc == 1);
		}
		if ($rc == 1) {
			$action = 'set permissions of';
			$rc = chmod $entry->{'stat'}[2], $entry->{tmp_source} if ($rc == 1);
		}
		if ($rc == 1) {
			$action = 'change ownership of';
			$rc = chown $entry->{'stat'}[4], $entry->{'stat'}[5], $entry->{tmp_source};
		}
		if ($rc == 1) {
			next;
		}
			  
		print "Error: failed to $action directory $entry->{tmp_source} ($!)\n";
		return 1;
	}
	return 0;
}


#
# Create isofs:
#
sub GenIsoFile {
  my ($image, $rootdir) = (@_); 
  my @args;
  push @args, $MkisofsBin::main; 
  push @args, "-o$image";
  push @args, '-f';			# follow symlinks
  push @args, '-N';			# omit version numbers
  push @args, '-allow-multidot';
  push @args, '-allow-lowercase';
  push @args, '-D';			# Do not use deep directory relocation.
  push @args, '-l';			# Allow full 31 character file names.
  push @args, '-relaxed-filenames';
  push @args, '-R';			# Use Rockridge extensions.
  push @args, '-J';			# Generate Joliet extensions for Win9X/WinNT
  push @args, "$rootdir";
  return system(@args);
}


#
# Blank media
#
sub BlankMedia {
  my ($method) = @_; 
  my @args;
  push @args, $CdrecordBin::main;
  push @args, "-blank=$method";
  return system(@args);
}


#
# Burn image onto media
#
sub BurnImage {
	my ($source) = @_;
	my @args;
	push @args, $CdrecordBin::main;
	push @args, '-v' if ($VerboseFlag::main);
	push @args, $source;
	return system(@args);
}


#
# Do the Cleanup:
#
sub CleanUp {
	my ($RmImage) = @_;
	print "Step: Cleanup...\n";

	unlink $ImageFilename::main if ($RmImage);
	rmtree $TargetDir::main, ($VerboseFlag::main) ? 1 : 0, 0;
}


####################################################################
#
# Miscellaneous
#
sub CheckBinaries {
	my ($BinMkisofs, $BinCdrecord, $OptBurnIt, $OptBlankMethod) = @_;

	my ($FoundMkisofs, $FoundCdrecord) = (0, 0);
	if ($SkipCreateImage::main == 0 or $OptBurnIt or $OptBlankMethod) {
		# Get file search path:
		my @paths;
		if ($IsDosWin::main) {
			@paths = split /;/, $ENV{PATH};
		} else {
			@paths = split /:/, $ENV{PATH};
		}
		#
		# Loop through found paths:
		#
		foreach my $path (@paths) {
			stat($path . '/' . $BinMkisofs);
			if (-e _ and -x _) {
				# file exists and we can execute it.
				$FoundMkisofs = 1;
			}
			stat($path . '/' . $BinCdrecord);
			if (-e _ and -x _) {
				# file exists and we can execute it.
				$FoundCdrecord = 1;
			}
		}
	}

	die "Binary $BinMkisofs not found or no execute rights.\n" if ($SkipCreateImage::main == 0 and !$FoundMkisofs);
	die "Binary $BinCdrecord not found or no execute rights.\n" if (($OptBurnIt or $OptBlankMethod) and !$FoundCdrecord);
}


#
# Validate given directories.
#
sub CheckSourceDirs {
	my $rSrcDir = shift;

	my $ErrorsFound = 0;

	my $wdir = cwd();
	foreach my $thisdir (@{$rSrcDir}) {
		print " Checking path '$thisdir' ...\n" if ($VerboseFlag::main);
		# Is this a directory?
		if (! -d $thisdir) {
			$ErrorsFound++;
			print "  Argument '$thisdir' isn't a directory.\n";
			next;
		}
		# We do not allow relative paths down to root.
		if ($thisdir =~ m/\.\./o) {
			$ErrorsFound++;
			print "  Directory $thisdir refused: contains '..'\n";
			next;
		}
		# Try to chdir to the given directory:
		if (chdir $thisdir) {
			my $newwdir = cwd();
			# So far this seams to be ok. Now check, that this directory
			# is not a subpath of $wdir:
			if ($wdir =~ m/^$newwdir/) {
				$ErrorsFound++;
				print "  Directory '$thisdir' refused: is subpath of '$wdir'\n";
				next;
			}
		} else {
			$ErrorsFound++;
			print "  Can't chdir to '$thisdir', refused: reason '$!'\n";
		}
	}

	# Restore original working directory.
	chdir $wdir;

	return $ErrorsFound == 0;
}


####################################################################
#
# Signal handlers
#
sub catch_pipe {
	$SigPipe::main++;
	$SigCount::main++;
}


sub catch_term {
	$SigTerm::main++;
	$SigCount::main++
}


sub catch_int {
	$SigInt::main++;
	$SigCount::main++;
}


##################
# Das ist main() #
##################

defined $Config::Config{d_symlink} or die "Symbolic links needed.\n";

#