#/usr/local/bin/perl
#In article <1990Mar14.231020.5784@smurf.sub.org>, urlichs@smurf (Matthias Urlichs) writes:
#| Another improvement is to open two NNTP channels to your favorite server. On
#| one, you do your NEWNEWS, and the other is used to fetch articles as soon as
#| their IDs come in over the first channel.
#| This is necessary on some low-speed Internet links like ours (which frequently
#| makes nntpd time out, drops connections, and other fun stuff) and basically
#| enabled us to get 24 hours of Usenet traffic in 14 hours instead of 30.
#| 
#| I'd like to convert this to a somewhat better C programming style before
#| letting the rest of the world see it, though...
#
#I've been using a Perl program that I call 'nntptap' that does exactly
#*that* for the last few months.  (In fact, my only feeds have been
#exclusively through this program.)
#
#nntptap opens NNTP servers on both sides (two on the sender, and one
#on the receiver), so it could be used in place of nntpxmit as well.
#
#If asked, it maintains a timestamp file of your choosing, and will
#record the beginning of the most recent successful transfer as the
#mtime of that file.  On subsequent transfers, the NEWNEWS command is
#generated accordingly.  If you don't use a timestamp, the default
#period is '42 days', which appears to be plenty big to get all the
#news on the source server that you don't have already. :-)
#
#Here's what it looks like in my crontab for news:
#
#42 * * * * nice /usr/lib/newsbin/nntptap2 -v1 -finews -s/usr/lib/news/stamp.inews >>/usr/lib/news/log.inews 2>&1
#52 * * * * nice /usr/lib/newsbin/nntptap2 -v1 -fomepd -s/usr/lib/news/stamp.omepd >>/usr/lib/news/log.omepd 2>&1
#12 0,2,4,6,8,10,12,14,16,18,20,22 * * * nice /usr/lib/newsbin/nntptap2 -v1 -f129.189.192.20 -s/usr/lib/news/stamp.orc >>/usr/lib/news/log.orc 2>&1
#12 1,3,5,7,9,11,13,15,17,19,21,23 * * * nice /usr/lib/newsbin/nntptap2 -v1 -f129.95.40.2 -s/usr/lib/news/stamp.ogicse >>/usr/lib/news/log.ogicse 2>&1
#
#It currently doesn't test distributions, just newsgroups, so it is
#prone to distribution leaks.  In practice, I have not found this to be
#a problem, so I am not solving it (yet!).
#
#I had problems with the server timing out, so I have a watchdog
#process that kills the transfer if it starts taking too long.  This
#has worked so far, but if you have a better solution, lemme know.
#
#If you start using this, lemme know, and I'll throw you on a mailing
#list for any update announcements.
#
################################################## snip here

$ZERO = $0;

sub usage {
	die join("\n",@_) .
	"\nusage: $ZERO [-f fromhost] [-t tohost] [-s stampfile] [-v verboselevel] [-g groups] [-w watchdogseconds]\n";
}

do 'getopt.pl' || die "Cannot do getopts.pl ($!)";

&Getopt('ftsvgw');

&usage("extra arguments: @ARGV") if $#ARGV > -1;

chop($thishost = `hostname`);
## defaults:
$fromhost = defined $opt_f ? $opt_f : $thishost;
$tohost = defined $opt_t ? $opt_t : $thishost;
$stampfile = $opt_s; # null string means no stamp
$verbose = defined $opt_v ? $opt_v : 0;
#$sub = defined $opt_g ? $opt_g : "comp,news,sci,rec,misc,soc,talk,to,alt,gnu,intel,pnw,or,pdx";
$sub = defined $opt_g ? $opt_g : "alt,control,houston,sci,usrgroup,austin,ieee,misc,slac,vmsnet,bionet,news,soc,bit,dfw,info,talk,biz,general,tamu,comp,gnu,k12,rec,tx,to.feenix";
$watchdogseconds = defined $opt_w ? $opt_w : 4*60*60; # 4 hour default

## verbose codes:
## 0 = only summary
## 1 = single letter progress
## 2 = noisy progress
## 3 = handshaking too

&usage("fromhost = tohost?") if $fromhost eq $tohost;

$sockaddr = 'S n a4 x8';
@x = getprotobyname('tcp'); $proto = $x[2];
@x = getservbyname('nntp','tcp'); $port = $x[2];

sub hosttoaddr {
	local($hostname) = @_;
	local(@x);
	if ($hostname =~ /^(\d+)+\.(\d+)\.(\d+)\.(\d+)$/) {
		pack('C4', $1, $2, $3, $4);
	} else {
		@x = gethostbyname($hostname);
		die "gethostbyname: $hostname ($!)" if $#x < 0;
		$x[4];
	}
}

$toaddr = &hosttoaddr($tohost);
$fromaddr = &hosttoaddr($fromhost);
$thisaddr = &hosttoaddr($thishost);

$thisproc = pack($sockaddr, 2, 0, $thisaddr);
$tonntp = pack($sockaddr, 2, $port, $toaddr);
$fromnntp = pack($sockaddr, 2, $port, $fromaddr);

$| = 1;

$mtime = ($stampfile && (@x = stat($stampfile)) && $x[9]) || time-86400*42;
@x = gmtime($mtime-3600); # one hour overlap
$timestamp = sprintf("%02d%02d%02d %02d%02d%02d GMT",
		$x[5],$x[4]+1,$x[3],$x[2],$x[1],$x[0]);

sub setup {
	local($FH) = shift;
	local($fromorto) = shift ? $fromnntp : $tonntp;
	local($oldfh);
	socket($FH, 2, 1, $proto) || die "$FH socket: $!";
	bind($FH, $thisproc) || die "$FH bind: $!";
	connect($FH, $fromorto) || die "$FH connect: $!";
	$oldfh = select($FH); $| = 1; select($oldfh);
	(($_ = &get($FH)) =~ /^2/) || die "got $_ during greeting $FH";
	&put($FH,"SLAVE");
	(($_ = &get($FH)) =~ /^2/) || die "got $_ during slave $FH";
}

sub put {
	local($FH) = shift;
	local($what) = shift;
	print $FH "$what\n";
	print "$FH >>> $what\n" if $verbose >= 3;
	$what;
}

sub get {
	local($FH) = shift;
	local($what);
	$what = <$FH>;
	$what =~ s/\015//;
	$what =~ s/\n//;
	print "$FH >> $what\n" if $verbose >= 3;
	$what;
}

$starttime = time;
@x = localtime($starttime);
printf "%s: begin %02d/%02d/%02d %02d:%02d:%02d\n",
	$fromhost,$x[5],$x[4]+1,$x[3],$x[2],$x[1],$x[0];

## launching the watchdog:

unless (fork) {
	$target = $starttime + $watchdogseconds;
	while (time < $target) {
		sleep 120;
		exit 0 if ($ppid = getppid) == 1; # orphaned
	}
	kill 15, $ppid;
	sleep 10;
	kill 9, $ppid;
	exit 1;
}

&setup("FI",1); # 'F'rom 'I'ndex: send NEWNEWS, use reply as worklist
&setup("FD",1); # 'F'rom 'D'ata: send ARTICLE to fetch article
&setup("TD",0); # 'T'o 'D'ata: send IHAVE to see if wanted, and to store it

## basic algorithm: start a NEWNEWS going.  As it spits out each article ID,
## send that down as an IHAVE to the receiver (we lie, because we don't
## *really* have it... yet).  If the receiver wants it (doesn't say "Got it"),
## send ARTICLE on the other input channel to get the text, sending it line
## at a time to receiver.  If the sender balks on the ARTICLE (expired or
## cancelled), send an empty article to the receiver (which it mostly
## ignores).  Repeat this a zillion times.

($groups = $sub) =~ s/([^,]+)/\1.*/g;
$groups .= ",control";
&put("FI","NEWNEWS $groups $timestamp");
(($_ = &get("FI")) =~ /^2/) || die "got $_ during newnews FI";

MAIN: {
	$_ = &get("FI");
	last MAIN if /^\./;
	$art = $_;
	$arts++;
	&put("TD", "IHAVE $art");
	$_ = &get("TD");
	unless (/^3/) {
		$rejects++;
		print "$fromhost: rejecting $art: $_\n" if $verbose >= 2;
		print "r" if $verbose == 1;
		redo MAIN;
	}
	&put("FD", "ARTICLE $art");
	$_ = &get("FD");
	unless (/^2/) {
		# they didn't have what they said they had (expired/cancelled)
		&put("TD","."); # terminate the article, sorry!
		$aborts++;
		print "$fromhost: aborting $art: $_\n" if $verbose >= 2;
		print "a" if $verbose == 1;
		&get("TD"); # ignore return
		redo MAIN;
	}
	print "$fromhost: transferring $art\n" if $verbose >= 2;
	print "t" if $verbose == 1;
	INNER: {
		$_ = &get("FD");
		last INNER if /^\.$/;
		# an initial period is doubled, but that's the way we want it
		&put("TD",$_);
		redo INNER;
	}
	&put("TD",".");
	$_ = &get("TD");
	unless(/^2/) {
		$errors++;
		print "$fromhost: error at $art: $_" if $verbose >= 2;
		print "e" if $verbose == 1;
	}
	redo MAIN;
}

print "\n" if $verbose == 1;
printf "%s: stats %d offered %d accepted %d rejected %d aborted %d failed\n",
	$fromhost, $arts, $arts-$rejects-$aborts-$errors, $rejects,
	$aborts, $errs;
@x = times;
printf "%s: times user %.1f system %.1f elapsed %.1f\n",
	$fromhost, $x[0], $x[1], time-$starttime;
exit 2 if $errs;
if ($stampfile) {
	unless (-e $stampfile) {
		open(S,">$stampfile") || die "Cannot create $stampfile ($!)";
		close(S);
	}
	utime $starttime, $starttime, $stampfile ||
		die "Cannot utime $stampfile ($!)";
}
exit 0;