#!/usr/bin/perl -w
#
# cvslog: merge cvs logs of multiple files into one
#
# Copyright (C) 2001 Tony Finch <dot@dotat.at>
#
# $dotat: scripts/cvslog,v 1.38 2002/08/12 10:26:49 fanf2 Exp $
#
# Permission is granted to use, modify, and distribute this software and its
# documentation, provided that the copyright and version information, this
# permission notice, and the following disclaimer appear in all copies, and
# that modified copies are clearly marked as such and are not endorsed or
# promoted using the name of the author without written permission.
#
# This software and its documentation are provided "as is", without warranty
# of any kind: in no event shall the author be held liable for any damages
# arising from the use of this software or its documentation.


use strict;

use Getopt::Long;
use IO::Pipe;

sub log_entry ($$$$$$);
sub splitrev ($);
sub isbranch ($);
sub tabulate (@);
sub unixtime ($);
sub debug ($);


#### data structures etc.

# A particular version of a particular file is called a "filerev"
# which is the concatenation of the RCS filename and the version
# number with a hyphen between them.

# We keep lists of filerevs as the keys of a hash like this:
#
# $revlist = { $filerev => $lines, ... };

# A reference to the following structure represents a change set.
#
# $change = {
#	revs    => $revlist,
#	time    => $time,
#	date    => $date,
#	author  => $author,
#	message => $message,
# };

# This is a list of all the changes.
#
my @changes;

# We need to be able to look up the change for a given filerev.
# This hash may contain more than one reference to the same change.
#
my %revchange;

# We keep an index of changes keyed on the
# commit message and author in order to combine
# multi-file commits into one entry.
#
# $similar{"$author\n$message"} = [ $change, ... ];
#
my %similar;

# A single commit might not be completed within
# one second, so we allow a fudge factor when
# comparing the time stamps of commits that we
# think might be the same. The fudge factor is
# larger than you might expect because when doing
# a single commit that spans multiple directories
# to a repository on the local machine, cvs gives
# you the opportunity to re-edit the commit
# message for each directory and commits the
# relevant files immediately after each edit.
#
my $time_fudge = 30;

# Fake changes that represent the start and end of all
# time. Note that perl produces an unsigned result for
# the ~ operator so it will always be bigger than the
# maximum time_t which is signed.
#
my $alpha = { time => -~0, date => "?" };
my $omega = { time =>  ~0, date => "?" };

# A reference to the following structure represents
# a tag or a branch. The before and after entries
# refer to the last commit before and the first
# commit after the tag or branch was created. This
# may be incorrect if tags of the same name have
# been laid down on different files at different
# times. What is worse, the same name may be used
# for a tag in one file and a branch in another!
#
# $tag = {
#	revs   => $revlist,
#	before => $change1,
#	after  => $change2,
# these two are only really used for debugging
#	name   => $tagname,
#	type   => "tag" or "branch",
# };

# Tags with the same name in different files are
# combined using this data structure.
#
# $tag{$tagname} = $tag;
#
my %tag;

# We keep a list of the tags that apply to each filerev.
# We enter branches here twice, once for the revision
# just before the branch was created (for working out
# when that happened), and once for the branch number
# (which may be distinguished from a normal revision
# number because it has an even number of dots).
#
# $revtags{$filerev} = [ $tagname, ... ];
#
my %revtags;


# regexps etc. for parsing the cvs log output
#
my $file_re  = '^RCS file: (.*)$';
my $tag_re   = '^\s+([^:]+):\s+([0-9.]+)$';
my $head_end = "description:\n";
#
my $lineN    = "=" x 77 . "\n";
my $line0    = "-" x 28 . "\n";
my $line1_re = '^revision\s+([0-9.]+)$';
my $line2_re = '^date:\s+([^;]*);\s+author:\s+([^;]*);\s+'
             . 'state:\s+([^;]*);(\s+lines:)?(.*)$';
my $line3_re = '^branches:(\s+[0-9.]+;)+$';


#### parse options

my %option;
Getopt::Long::Configure 'bundling';
GetOptions (\%option, "debug|d!", "noless|L!", "brieftags|n!",
	    "notags|N!", "pipe|p!")
    or die "usage: cvslog [-d] [-L] [-n] [-N] [-p] [file ...]\n",
           "\t-d\tdebugging output\n",
	   "\t-L\tdon't run pager\n",
           "\t-n\tshow less tag information\n",
           "\t-N\tdo not show any tags\n",
           "\t-p\tread input from pipe\n";

#### work out where to get the cvs log input from

my $inpipe;
if ($option{pipe}) {
	$inpipe = new_from_fd IO::Handle fileno(STDIN), "r";
	die "$0: fdopen: $!\n" unless defined $inpipe;
} else {
	$inpipe = new IO::Pipe;
	die "$0: pipe: $!\n" unless defined $inpipe;
	$inpipe->reader("cvs", "log", @ARGV);
}

#### work out where to send output to

my $outpipe;
if ($option{noless} or not -t STDOUT) {
	$outpipe = new_from_fd IO::Handle fileno(STDOUT), "w";
	die "$0: fdopen: $!\n" unless defined $outpipe;
} else {
	$outpipe = new IO::Pipe;
	die "$0: pipe: $!\n" unless defined $outpipe;
	$outpipe->writer($ENV{PAGER});
}

#### slurp in the logs

for (;;) {
	my $file;
	my $line;

	# read tag metadata etc. and find start of revision messages
	while($line = $inpipe->getline) {
		if ($line =~ /$file_re/o) {
			$file = $1;
			debug "RCS file $file";
			next;
		}
		if ($line =~ /$tag_re/o and not $option{notags}) {
			$tag{$1}->{name} = $1;
			my $tag = $tag{$1};
			my $filerev = "$file-$2";
			# fix branch revision numbers
			$filerev =~ s/\.0(\.\d+)$/$1/;
			$tag->{revs}->{$filerev} = "";
			push @{$revtags{$filerev}}, $tag->{name};
			$tag->{before} ||= $alpha;
			$tag->{after} ||= $omega;
			if (isbranch $filerev) {
				$tag->{type} = "branch";
				my $startrev = splitrev $filerev;
				push @{$revtags{$startrev}}, $tag->{name};
			} else {
				$tag->{type} = "tag";
			}
			debug "$tag->{type} $tag->{name} rev $filerev";
			next;
		}
		if ($line eq $head_end) {
			debug "end of header";
			last;
		}
	}
	last unless defined $line;

	# We have to be sure that we have all three lines of the header
	# for a revision entry before we stop accumulating the previous
	# commit's message. We process the log line by line, and keep
	# what we expect the next line to be in the state variable.

	my $state = "message";
	my $next_rev;
	my $rev;
	my $date;
	my $author;
	my $lines;
	my $message;

	while($line = $inpipe->getline) {
		if ($state eq "line1") {
			if ($line =~ /$line1_re/o) {
				debug "line1 $line";
				$next_rev = $1;
				$state = "line2";
				next;
			}
			$message .= $line0;
		}
		if ($state eq "line2") {
			if ($line =~ /$line2_re/o) {
				debug "line2 $line";
				# we now have the header for the next change
				# so save the previous change if there was one
				log_entry $file, $rev, $date, $author, $lines, $message
					if defined $rev;
				# and remember the header info of this change
				$rev = $next_rev;
				$date = $1;
				$author = $2;
				$lines = $5;
				$message = "";
				$state = "line3";
				next;
			}
			$message .= $line0 . "revision $next_rev\n";
		}
		if ($state eq "line3") {
			if ($line =~ /$line3_re/o) {
				debug "line3 $line";
				# ignore this optional header line
				$state = "message";
				next;
			}
		}
		$state = "message";
		if ($line eq $lineN) {
			debug "end of file";
			last;
		}
		if ($line eq $line0) {
			debug "start of entry";
			$state = "line1";
			next;
		}
		$message .= $line;
	}
	log_entry $file, $rev, $date, $author, $lines, $message
		if defined $rev;
}

#### cvs has finished

$inpipe->close;

#### create fake change entries for the tags

unless ($option{brieftags}) {
	for my $tag (values %tag) {
		my $date;
		# skip tags for which we didn't see the actual revisions
		next unless $tag->{before}->{time} > $alpha->{time};
		$date = "$tag->{before}->{date} ... $tag->{after}->{date}";
		push @changes, {
			revs    => $tag->{revs},
			time    => $tag->{before}->{time},
			date    => $date,
			author  => "",
			message => "created $tag->{type} $tag->{name}\n"
		};
	}
}

#### sort and print the changes

for my $change (sort {$a->{time} <=> $b->{time}} @changes) {
	$outpipe->print("$change->{date} $change->{author}\n\n");
	for my $filerev (sort {$a cmp $b} keys %{$change->{revs}}) {
		my $lines = $change->{revs}->{$filerev};
		$filerev =~ /^(.*)-([0-9.]*)$/;
		my ($file,$rev) = ($1,$2);
		$outpipe->print("  $file\n");
		my @tags;
		my @bps;
		for my $tagname (sort @{$revtags{$filerev}}) {
			if (exists $tag{$tagname}->{revs}->{$filerev}) {
				push @tags, $tagname;
			} else {
				push @bps, $tagname;
			}
		}
		my $branchrev = splitrev $filerev;
		my @branches = sort @{$revtags{$branchrev} or []};
		unshift @tags, "  tags:" if @tags;
		unshift @bps, "  branch points:" if @bps;
		unshift @branches, "  branches:" if @branches;
		$outpipe->print("    $rev$lines@bps@branches@tags\n");
	}
	$outpipe->print("\n", $change->{message});
	$outpipe->print("-" x 32, "\n");
}

#### that's all, folks

$outpipe->close;
exit;

#### add a log entry

sub log_entry ($$$$$$) {
	my $file    = shift;
	my $rev     = shift;
	my $date    = shift;
	my $author  = shift;
	my $lines   = shift;
	my $message = shift;

	my $filerev = "$file-$rev";
	my $time = unixtime $date;

	debug "log entry $file $rev $date $author $lines";

	my $change;

	# if there's a similar change within the time fudge
	# period, merge this change with it.
	#
	my $key = "$author\n$message";
	for my $similar (@{$similar{$key}}) {
		if ($time_fudge > abs($time - $similar->{time})) {
			$change = $similar;
			last;
		}
	}
	# otherwise this is a separate change
	#
	unless (defined $change) {
		$change = {
			time    => $time,
			date    => $date,
			author  => $author,
			message => $message,
		};
		push @changes, $change;
		push @{$similar{$key}}, $change;
	}
	$change->{revs}->{$filerev} = $lines;
	$revchange{$filerev} = $change;

	# log entries turn up in reverse chronological
	# order by branch, which allows us to take a
	# short-cut with the book keeping: if this
	# revision is tagged then we can update the
	# tag's before and after timestamps based on
	# this revision and its successor which we
	# know we have already seen if there is one.
	#
	# We try to avoid bogus time stamps by ensuring
	# that they are in the interval that we have
	# already established, but this has the problem
	# that if the first file we come across is bogus
	# then we will never get near the truth.
	#
	my ($nextrev,$x) = splitrev $filerev;
	$nextrev = $nextrev.".".($x+1);
	my $after = $revchange{$nextrev} || $omega;
	for my $tagname (@{$revtags{$filerev}}) {
		my $tag = $tag{$tagname};
		if ($tag->{before}->{time} < $time and
		    $time < $tag->{after}->{time}) {
			$tag->{before} = $change;
			debug "tag $tag->{name} after $date";
		}
		if ($tag->{before}->{time} < $after->{time} and
		    $after->{time} < $tag->{after}->{time}) {
			$tag->{after} = $after;
			debug "tag $tag->{name} before $after->{date}";
		}
	}
}

#### revision number utils

sub splitrev ($) {
	my $rev = shift;
	$rev =~ s/\.(\d+)$//;
	if (wantarray) {
		return ($rev,$1);
	} else {
		return $rev;
	}
}

sub isbranch ($) {
	my $rev = shift;
	$rev =~ s/^.*-([0-9.]+)$/$1/;
	$rev =~ s/[^.]//g;
	return (1 & length $rev) ? 0 : 1;
}

#### convert human-readable time to unix time

sub unixtime ($) {
	my $string = shift;

	$string =~ /(\d\d\d\d).(\d\d).(\d\d).(\d\d).(\d\d).(\d\d)/;
	my $year   = $1;
	my $month  = $2;
	my $day    = $3;
	my $hour   = $4;
	my $minute = $5;
	my $second = $6;

	use integer;

	$year -= $month < 3;
	$month += $month < 3 ? 10 : -2;
	my $time = $year/400 - $year/100 + $year/4 + $year*365
	         + $month*367/12 + $day - 719499;
	$time = $time * 86400
	      + $hour * 3600
	      + $minute * 60
	      + $second;

	return $time;
}

####

sub debug ($) {
	my $message = shift;
	print STDERR "$message\n"
		if $option{debug};
}

#### EOF