#!/usr/bin/env perl -w
#
# cirdr_status:  Find a pipeline by name using the pipeline list, and
#                determine it's location and current status.
#

use strict;
use Getopt::Long;

use FindBin qw($Bin);
use lib $Bin;

use PipelineList ':all';
use PipelineStatus ':all';

### Main ###

# Extract the program name from $0.
my $progname = basename($0);

# Extract command-line arguments.
my $listfile = DEFAULT_LIST;
my $htmlmode = 1;

Getopt::Long::Configure('bundling');

my $rv = GetOptions('n|nohtml' => sub { $htmlmode = 0; },
		    'l|list=s' => \$listfile);

die "Usage:\t$0 [-nl] [pipeline] [...] outputdir\n" if(!$rv || @ARGV == 0);

# Get output directory.
my $outdir = pop @ARGV;
$outdir =~ s|/+$||;

unless(-d $outdir) {
    mkdir($outdir) || die "$progname: mkdir: $outdir: $!\n";
}

# Look up the pipelines on the command-line.
my @pipe_names = map {
    lc($_);
} @ARGV;

my $errstr = undef;
my @pipe_list = read_list($listfile, \@pipe_names, \$errstr);

if(@pipe_list == 0) {
    if(defined $errstr) {
	die "$progname: list file $listfile: $errstr\n";
    }
    else {
	die "$progname: no pipelines found in list\n";
    }
}

my @snames = stage_names;

# Create output summary file.
my $sumfn = sprintf("%s/summary.%s", $outdir, ($htmlmode ? 'html' : 'list'));
unless(defined(open(SUMFH, ">$sumfn"))) {
    die "$progname: open: $sumfn: $!\n";
}

if($htmlmode) {
    print SUMFH <<EOD
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
        "http://www.w3.org/TR/html4/loose.dtd">
<html><head><title>WFCAM pipeline status</title></head>
<style type="text/css"><!--
  TH { background-color: #ccccff; text-align: center }
  TD { text-align: center }
  TD.notstart { background-color: #ffffff }
  TD.incomplete { background-color: #ffcccc }
  TD.complete { background-color: #ccffcc }
  TABLE { padding: 5 }
--></style>
<body bgcolor="#ffffff" text="#000000">
<p><center><h1>WFCAM pipeline status</h1></center></p>
<p><table border>
EOD
    ;

    print SUMFH "<tr><th>Data-set</th>";

    foreach (@snames) {
        (my $title = $_) =~ s/^(.)/uc($1)/e;
	print SUMFH "<th>$title</th>";
    }

    print SUMFH "<th>Overall</th></tr>\n";
}

# Determine the status of each pipeline.
foreach my $param (@pipe_list) {
    # Get files under the pipeline path.
    my($dir, $files) = find_fits($param->{path});
    unless(defined $dir && defined $files) {
	if($htmlmode) {
	    print SUMFH ("<tr><td>", $param->{name}, "</td>");
	    printf SUMFH ("<td class=\"notstart\" colspan=\"%d\">Not started</td></tr>",
			  scalar(@snames) + 1);
	}
	else {
	    printf SUMFH ("%s Not started\n", $param->{name});
	}
	next;
    }

    # Create summary file for pipeline.
    my $outfn = sprintf("%s.%s", $param->{name}, ($htmlmode ? 'html' : 'list'));
    unless(defined(open(OUTFH, ">$outdir/$outfn"))) {
	die "$progname: open: $outdir/$outfn: $!\n";
    }

    if($htmlmode) {
	my $name = $param->{name};

	print OUTFH <<EOD
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
        "http://www.w3.org/TR/html4/loose.dtd">
<html><head><title>WFCAM pipeline status for $name</title></head>
<style type="text/css"><!--
  TH { background-color: #ccccff; text-align: center }
  TD { text-align: center }
  TD.notstart { background-color: #ffffff }
  TD.incomplete { background-color: #ffcccc }
  TD.complete { background-color: #ccffcc }
  TABLE { padding: 5 }
--></style>
<body bgcolor="#ffffff" text="#000000">
<p><center><h1>WFCAM pipeline status for $name</h1></center></p>
<p><font size="-1"><pre>
EOD
    ;
    }

    # Print header.
    print OUTFH <<EOD
                  --------Stage 1---------  Def  Sky  --Tartup--  WCS  ---Catalogues---   Sky%  Phot
                  Done Lin Reset Dark Flat            Done Reset  0 1  Done Astrom Class            
EOD
    ;

    # Get status.
    my $errstr;
    my @summary = ();

    my $last_stage = 0;

    my @stages = ();
    my @complete = ();
    
    for(my $i = 0; $i <= $#snames; $i++) {
	$complete[$i] = 1;
    }

    my @warn_list = ();

    foreach (@$files) {
	# Filter out any catalogue files passed in by mistake.
	next if($_ =~ /_cat\.(?:fit|fits|fts)(?:\[[^\]]+\])?$/);
	
	my($fileroot, $pstat) = get_status($_, \$errstr);
	unless(defined $fileroot) {
	    my $msg = "$_: $errstr, skipped";
	    warn($msg, "\n");
	    push(@warn_list, $msg);
	}

	my $base = basename($fileroot);
	$base =~ /^(.*?)\.(fit|fits|fts)$/;

	printf OUTFH ("%-16.16s    %s   %s    %s     %s    %s    %s    %s     %s    " .
		      "%s    %s %s    %s     %s     %s      %s     %s    \n",
		      $1,
		      ($pstat->{stage1} ? "x" : " "),
		      ($pstat->{lim} ? "x" : " "),
		      ($pstat->{reset} ? "x" : " "),
		      ($pstat->{dark} ? "x" : " "),
		      ($pstat->{flat} ? "x" : " "),
		      ($pstat->{ndfringe} ? ($pstat->{defringe} ? "y" : "n") : " "),
		      ($pstat->{skysub} ? "x" : " "),
		      ($pstat->{tartup} ? "x" : " "),
		      ($pstat->{nrstanom} ? ($pstat->{rstanom} ? "y" : "n") : " "),
		      ($pstat->{roughwcs} ? "x" : " "),
		      ($pstat->{firstwcs} ? "x" : " "),
		      ($pstat->{imcore} ? "x" : " "),
		      ($pstat->{astrom} ? "x" : " "),
		      ($pstat->{classify} ? "x" : " "),
		      ($pstat->{npercent} ? ($pstat->{percent} ? "y" : "n") : " "),
		      ($pstat->{photom} ? "x" : " "));

	my $stage = 0;

	for(my $i = 0; $i <= $#snames; $i++) {
	    if($pstat->{$snames[$i]}) {
		$stage = $i;
	    }
	    else {
		$complete[$i] = 0 unless(($i == 1 && !$pstat->{ndfringe}) ||
					 ($i == 9 && !$pstat->{npercent}));
	    }
	}
	
	push(@stages, $stage);
	$last_stage = $stage if($stage > $last_stage);
    }

    if($htmlmode) {
	my $timestr = localtime();

	print OUTFH <<EOD
</pre></font></p>
EOD
    ;

	if(@warn_list) {
	    print OUTFH "<p><h2>Warnings</h2></p>\n";
	    print OUTFH "<p><font size=\"-1\"><pre>\n";

	    foreach my $msg (@warn_list) {
		print OUTFH "$msg\n";
	    }

	    print OUTFH "</pre></font></p>\n";
	}

	print OUTFH <<EOD
<hr>
<p>Last updated: $timestr</p>
</body></html>
EOD
    ;
    }

    close(OUTFH) || die "$progname: close: $!\n";

    for(my $i = 0; $i <= $#snames; $i++) {
	$summary[$i] = ($i > $last_stage ? STAGE_NOTSTARTED :
		       ($complete[$i] ? STAGE_COMPLETED : STAGE_INCOMPLETE));
    }

    if($htmlmode) {
	printf SUMFH ("<tr><td><a href=\"%s\">%s</a></td>",
		      $outfn, $param->{name});

        my $allnone = 1;
	my $alldone = 1;
	for(my $i = 0; $i <= $#snames; $i++) {
            if($summary[$i] == STAGE_NOTSTARTED) {
                print SUMFH "<td class=\"notstart\">Not started</td>";

		$alldone = 0;
            }
            if($summary[$i] == STAGE_INCOMPLETE) {
                print SUMFH "<td class=\"incomplete\">Incomplete</td>";

		$allnone = 0;
		$alldone = 0;
            }
            if($summary[$i] == STAGE_COMPLETED) {
                print SUMFH "<td class=\"complete\">Complete</td>";

		$allnone = 0;
            }
        }

	if($allnone) {
	    print SUMFH "<td class=\"notstart\">Not started</td>";
	}
	elsif($alldone) {
	    print SUMFH "<td class=\"complete\">Complete</td>";
	}
	else {
	    print SUMFH "<td class=\"incomplete\">Incomplete</td>";
	}

        print SUMFH "</tr>\n";
    }
    else {
	my $lastdone = $#snames;
	for(my $i = 0; $i <= $#snames; $i++) {
	    if($summary[$i] != STAGE_COMPLETED) {
		$lastdone = $i - 1;
		last;
	    }
	}
	
	printf SUMFH ("%s %s %s %.2d/%.2d %s\n", $param->{name}, $dir, $param->{instrument},
		      $lastdone, $#snames, $snames[$lastdone]);
    }
}

if($htmlmode) {
    my $timestr = localtime();

    print SUMFH <<EOD
</table></p>
<hr>
<p>Last updated: $timestr</p>
</body></html>
EOD
    ;
}

close(SUMFH) || die "$progname: close: $!\n";

exit(0);

### Functions ###

sub basename {
    my($path) = @_;

    if($path =~ m|([^/]+)$|) {
        return($1);
    }

    return($path);
}

# Docs
=pod

=head1 NAME

cirdr_status - produce a summary of pipeline status

=head1 SYNOPSIS

B<cirdr_status> [B<-l> I<listfile>] [B<-n>] [I<pipeline>] [...] I<outputdir>

=head1 DESCRIPTION

B<cirdr_status> determines the current processing status of a CIRDR
data reduction pipeline for WFCAM data, from the FITS headers in the
image and catalogue files produced by the pipeline.  A summary of the
current processing status, and details for all files being processed
by each named pipeline are produced.

The default is to create the output in HTML format.  A summary table
is written to the file F<summary.html> in the directory F<outputdir>
(which will be created if necessary), containing links to individual
HTML summaries for each pipeline, written in files named
F<{pipeline}.html> where I<{pipeline}> is the name of the pipeline.
An alternative ASCII output mode can also be used (see OPTIONS,
below).

The pipeline names, supplied via the parameter I<pipeline>, are mapped
to the pipeline locations through the file F<listfile>.  The
format of this file is described in FILES, below.  If no pipeline
names are supplied, all pipelines in the list file are processed.

Under each location from the list, the script performs a recursive
search through this directory, and all subdirectories, for files named
with F<.fit>, F<.fits> and F<.fts> extensions.  Only target files are
considered by filtering out any catalogues and calibration frames.
The search code will be revised when a final directory structure for
pipeline processing is fixed.

The FITS headers of each target frame are interrogated to ascertain
the current status of processing.  Catalogue files are opened also if
they exist, and are searched for under the standard names used by the
CIRDR software.  A single-line summary of the stages of processing
completed (see STAGES, below) is printed to the detailed summary file
for each target frame in a simple ASCII format, described under
SUMMARY FORMAT.  Any warnings generated during this process are
written to a section named I<Warnings> at the end of the HTML output,
or printed to the standard error stream if the output is ASCII.

A line is written to the overall summary for each pipeline, detailing
the stages of processing which the script deems to be complete.

=head1 OPTIONS

The following command-line options are supported:

=over

=item B<-l> I<listfile>

Uses F<listfile> as the pipeline list file instead of the default of
F<pipelines.list> in the current directory.

=item B<-n>

Disables HTML output.  The summary file is now written in ASCII
format as F<summary.list>, with the summary for pipeline
I<{pipeline}> written to F<{pipeline}.list>.  The use of this option
is not supported, and it may be removed in the future.

=back

=head1 SUMMARY FORMAT

The detailed summary consists of an ASCII table containing a column
for each stage of processing.  Some stages of the processing are only
required by a subset of data-sets processed.  These are handled
slightly differently.

For stages which must be passed by all data-sets, the column contains
a blank (space) character if the stage has not been completed, and an
C<x> character if the stage has been completed.  For stages which are
not required for all data, a C<n> character indicates that the stage
is not required.  Otherwise, it is required and a blank and C<y>
character signify that the stage is not complete, or is complete,
respectively.

The following columns are present for WFCAM data, in approximate order
of processing within the pipeline software:

=over

=item B<Stage 1>

=over

=item I<Done>

Stage 1 processing has been run, and is considered to be complete by
the script.  This should mean that all four of the stages below have
been completed.

=item I<Lin>

The image has been linearity corrected.

=item I<Reset>

The image has had reset correction (subtraction of a bias image)
performed.

=item I<Dark>

Dark correction has been performed: a dark frame has been subtracted.

=item I<Flat>

Flatfield correction has been performed: the image has been divided by
a flatfield frame.

=back

=item B<Def>

Defringing should be (if C< >) / has been (if C<y>) performed on the
image.

=item B<Sky>

Sky subtraction has been performed.

=item B<Tartup>

=over

=item I<Done>

The processing in F<cir_tartup.c> has been run, and is considered to
be complete by the script.  This should mean that all of the stages
below have been completed.

=item I<Reset>

Correction of reset anomalies has been performed.

=back

=item B<WCS>

=over

=item I<0>

An initial rough world co-ordinate system (WCS) has been written to
the FITS headers.  This is derived directly from the telescope
pointing information.

=item I<1>

A first-pass world co-ordinate system has been written.  This is
derived from the image only using an APM-like algorithm to extract
sources and refine the coordinates against an astrometric catalogue.

=back

=item B<Catalogues>

=over

=item I<Done>

The source catalogue for this frame has been produced.  This means
that I<imcore> has been run and a catalogue file exists.

=item I<Astrom>

A second-pass world co-ordinate system has been written.  This is
derived by refining the WCS against an astrometric catalogue using the
result of the source extraction in I<imcore>.

=item I<Class>

The object morphological classifier has been run.

=back

=item B<Sky%>

Sky percentage corrections have been computed and written to the FITS
headers.

=item B<Phot>

The photometry stage of the pipeline has been run.  Here the
photometric calibration of the catalogues is performed.

=back

The short summary format used in the F<summary.html> file contains one
line per defined pipeline, containing one column for each of the
stages listed below, and I<Complete>, I<Incomplete> or I<Not Started>
if the script deems the relevant processing to have finished, have
seen started but not finished, or not started, respectively.  An
overall column at the right summarises processing for this pipeline.

The following stages are listed (see above for descriptions):

=over

=item * I<Stage1>

=item * I<Defringe>

=item * I<Skysub>

=item * I<Roughwcs>

=item * I<Tartup>

=item * I<Firstwcs>

=item * I<Imcore>

=item * I<Astrom>

=item * I<Classify>

=item * I<Percent>

=item * I<Photom>

=back

=head1 EXAMPLES

Process pipelines C<example1> and C<example2>, producing HTML output
in F<outdir>.

  cirdr_status example1 example2 outdir

Process all pipelines listed in the alternative list file F</dir/list>
placing the output in F<outdir>.

  cirdr_status -l /dir/list outdir

=head1 FILES

=over

=item F<pipelines.list>

A list of pipeline names, with locations and the instrument used to
generate the data.  The file format is based on the F<.ini> format, in
particular it resembles that used in smb.conf(5) from the Samba
package.

The character C<#> is a comment, and instructs the parser to ignore
the rest of the line.  The remainder of the file contains sections,
describing each pipeline, denoted by C<[{pipeline}]> on a line by
itself for pipeline I<{pipeline}>.  The following lines up to the next
section delimiter are keyword-value pairs, with the key and value
separated by an C<=> character, with optional surrounding whitespace,
ie. C<{key}={value}>.  The following keys are supported:

=over

=item I<path>

Specifies the full path to the pipeline.  This must be given for each
pipeline in the file.

=item I<instrument>

Specifies the instrument used to take the data.  This currently does
nothing, and should be specified as C<wfcam>.  A future version of the
script may support multiple instruments.

=back

=back

=head1 NOTES

This script is very specific to the draft WFCAM FITS headers and
pipeline.

Since there is no file-locking by the pipeline we have to guess that
the file is being written if CFITSIO complains.  The code is likely to
give unreliable results for the particular file being written,
especially if, for example, extra FITS headers were appended which
caused the HDU to need an extra FITS block assigning.

The recursive searching utilised here will give unreliable results in
certain pipeline directory structures.  When a standard directory
hierarchy for pipeline processing is decided, this script will be
modified to use it instead.

=head1 SEE ALSO

CIRDR pipeline documentation
http://www.ast.cam.ac.uk/~jrl/cirdr/index.html

smb.conf(5)

=head1 AUTHOR

Jonathan Irwin (jmi@ast.cam.ac.uk)

=cut

