#!/usr/bin/perl
# $Id: $
# Written by Adrian Mariano, additional features by Eric Backus and
# Jeff Conrad.

# Script to translate a texinfo file into an nroff/troff manual page.
# last revision: 21 October 2024 Jeff Conrad

$thisprog = $0;
$thisprog =~ s/.*[\/\\]//;
$version="1.2.10";

$html=0;
$info=0;
$example=0;
$ignore=0;
$tex=0;
$doman=0;
$in_comment=0;		# within lines to be commented out for man source
$title=0;
$diditem=0;
$justdidparagraph=1;		# indicates paragraph tag was output
$noman=0;		# indicates material to be skipped for man source
$manprefix="";		# holds n/troff comment string .\"
$args=($#ARGV < 0) ? "stdin" : "@ARGV";

$SH_delim = ".\\\" ====================================================================";
$SS_delim = ".\\\" --------------------------------------------------------------------";

printf(".\\\" Do not edit this file.  It was created from %s\n", $args);
printf(".\\\" using %s version %s.\n", $thisprog, $version);

while(<>)
{
    # check for unbalanced {} in @-commands and math
    if (! $noman && $_ =~ /{/) {
      while ($_ =~ /[^\\]{/g) { $opening++; }
      while ($_ =~ /}/g) { $closing++; }
      if ($closing != $opening) {
	printf(STDERR "unbalanced {}, line %d: %s", $., $_);
      }
      $opening = $closing = 0;
    }
    # use font CW in tables
    if (/\@c man\s+l\s/)
    {
	s/\@c man //;
	s/l/lfCWp-1/;
	print;
	next;
    }
    if (/\@c\s+man\s+program/) { 
	chop;
	s/\@c\s+man\s+program\s+//;
	$program = $_;
	next;
    }

    if (s/\@c man //)
    {
	print;
	if (/AUTHOR/) {
	    $errors = 0;
	    if (! $author) {
		printf(STDERR "%s: missing '\@author'\n", $thisprog);
		$errors++;
	    }
	    if (! $program) {
		printf(STDERR "%s: missing '\@c man program'\n", $thisprog);
		$errors++;
	    }
	    if ($errors) { exit; }
	    else { printf(".I %s\nwas written by %s\n", $program, $author); }
	}
	if (/\.TH/) { add_extensions(); }
	next;
    }

    if (/\@c noman/) { $noman=1; next; }
    if (/\@c end noman/) { $noman=0; next; }
    if ($noman) { next; }

    if (/^\@c ifman \.\\\"/) {
	$in_comment = 1;
    }
    if ($in_comment && /^\@c end ifman/) {
	$in_comment = 0;
    }
    # $manprefix holds comment string .\"
    if (/\@c ifman\s*(.*)/) { $doman=1; $manprefix = $1; next; }
    if (/\@c end ifman/) { $doman=0; $manprefix = ""; next; }
    if (/^\@c [^m]/) { next; }

    if (/^\\input/) { next; }
    if (/^\*/) { next; }
    if (/^START-INFO-DIR-ENTRY/) { next; }
    if (/^END-INFO-DIR-ENTRY/) { next; }

    if (/\@author/) {
	chop;
	s/\@author\s+//;
	$author = $_;
	next;
    }
    if (/\@titlepage/) { $title=1; next; }
    if (/\@end titlepage/) { $title=0; next; }
    if (/\@tex[[:space:]]*$/) { $tex=1; next; }
    if (/\@end tex/) { $tex=0; next; }
    if (/\@ignore/) { $ignore=1; next; }
    if (/\@end ignore/) { $ignore=0; next; }
    if (/\@ifinfo/) { $info=1; next; }
    if (/\@end ifinfo/) { $info=0; next; }
    if (/\@ifhtml/) { $html=1; next; }
    if (/\@end ifhtml/) { $html=0; next; }
    if (/\@html/) { $html=1; next; }
    if (/\@end html/) { $html=0; next; }
    if (!$doman && ($ignore || $html || $info || $title || $tex)) { next; }

    s/\@\*$/\n\.br/g;
    s/^\@\*/.br/g;
    s/\@\*$/\n.br/g;
    s/\@ / /g;
    s/\@dmn\{([^}]*)}/\\|$1/g;
    s/\@tie\{}/\@no_break_space\{}/g;
    s/\@w\{}/\@no_break_space\{}/g;
    s/\@backslashchar\{}/\\e/g;

    if (/(\@math\{)/) {
      # TODO: make this more robust? (jpc 2023-06-07)
      s/(\@math\{[^}]*)-([^}]*})/$1\\-$2/g;
      s/\^\\circ/\\(de/g;
      s/\^\\prime/\\(fm/g;
      # only works with eqn(1)
      #s/(\@math\{[^}]*)\\times *10\^([^}]*})/$1 times 10 sup $2/g;
      s/(\@math\{[^}]*)''([^}]*})/$1\\(sd$2/g;
      s/(\@math\{[^}]*)'([^}]*})/$1\\(fm$2/g;
      s/\\pm/\\(+-/g;
      s/\\mp/\\(-+/g;
      s/\@math\{([^}]*)}/\@no_decoration\{$1}/g;
    }

    # superscripts
    s/\@sup\{([^}]+)}([[:punct:]])*$/\\c\n.if n ^$1$2\n.if t \\v'-.4m'\\s-3$1\\s0\\v'.4m'$2/;
    s/\@sup\{([^}]+)}([[:punct:]])[[:space:]]+/\\c\n.if n ^$1$2\n.if t \\v'-.4m'\\s-3$1\\s0\\v'.4m'$2\n/g;
    s/\@sup\{([^}]+)}[[:space:]]+/\\c\n.if n ^$1\n.if t \\v'-.4m'\\s-3$1\\s0\\v'.4m'\n/g;
    s/\@sup\{([^}]+)}/\\c\n.if n ^$1\\c\n.if t \\v'-.4m'\\s-3$1\\s0\\v'.4m'\\c\n\\&/g;

    # subscripts
    s/\@sub\{([^}]+)}([[:punct:]])*$/\\c\n.if n $1$2\n.if t \\v'.3m'\\s-3$1\\s0\\v'-.3m'$2/;
    s/\@sub\{([^}]+)}([[:punct:]])[[:space:]]+/\\c\n.if n $1$2\n.if t \\v'.3m'\\s-3$1\\s0\\v'-.3m'$2\n/g;
    s/\@sub\{([^}]+)}[[:space:]]+/\\c\n.if n $1\n.if t \\v'.3m'\\s-3$1\\s0\\v'-.3m'\n/g;
    s/\@sub\{([^}]+)}/\\c\n.if n $1\\c\n.if t \\v'.3m'\\s-3$1\\s0\\v'-.3m'\\c\n\\&/g;

    # opening and closing double quotes
    s/``(\S)/\\(lq$1/g;
    s/(\S)''/$1\\(rq/g;

    # ellipsis
    s/(\.\.\.)/\\&...\\&/g;
    s/\@dots\{}/\\&...\\&/g;
    # degree symbol
    s/\@textdegree\{}/\\(de/g;

    s/\@cite\{([^}]*)}/\@in_sgl_quotes\{$1}/g;
    s/\@url\{([^}]*)}/\@in_sgl_quotes\{$1}/g;
    s/\@email\{([^}]*)}/\@in_sgl_quotes\{$1}/g;

    s/\@dfn\{([^}]*)}/\@in_italics\{$1}/g;

    s/\@emph\{([^}]*)}/\@in_italics\{$1}/g;
    s/\@i\{([^}]*)}/\@in_italics\{$1}/g;
    s/\@r\{([^}]*)}/\@in_roman\{$1}/g;
    s/\@var\{([^}]*)}/\@in_italics\{$1}/g;

    s/\@b\{([^}]*)}/\@in_bold\{$1}/g;
    s/\@strong\{([^}]*)}/\@in_bold\{$1}/g;

    # remove trailing comma from xref because man won't include the page number
    s/\@xref\{([^}]*)},/\@xref\{$1}/g;
    s/\@xref\{([^}]*)}/See \@in_italics\{$1}/g;
    s/\@ref\{([^}]*)}/\@ref\{$1}/g;
    s/\@ref\{([^}]*)}/\@in_italics\{$1}/g;
    s/\@pxref\{([^}]*)}/see \@in_italics\{$1}/g;
    s/\@uref\{([^}]*)}/\@in_roman\{$1}/g;

    if (/\@chapter.*\@command/)
    {
	s/\@command\{([^}]*)}/\@in_italics\{$1}/g;
    }

    # show in constant-width font
    s/\@code\{([^}]*)}/\@constwid\{$1}/g;
    s/\@command\{([^}]*)}/\@in_italics\{$1}/g;
    s/\@env\{([^}]*)}/\@constwid\{$1}/g;

    # show in constant-width oblique font
    s/\@kbd\{([^}]*)}/\@constwidI\{$1}/g;

    # handle backslash character in Windows pathname
    # starts with a drive specifier ...
    s/(\@file\}]*)}/\@constwidQ\{$1}/g;
    if (/(\@file\{[[:alpha:]]:)/) {
	# don't change font switches or escaped spaces
	s/(\S)\\(?!(\s|f[RIBP]|f\([A-Z]{2}))/$1\\e/g;
    }
    if (/(\@file\{\@w\{[[:alpha:]]:)/) {
	# don't change font switches or escaped spaces
	s/(\S)\\(?!(\s|f[RIBP]|f\([A-Z]{2}))/$1\\e/g;
    }
    # handle backslash character in sample
    s/(\@samp\{[^}]*)\\/$1\\e/g;

    # Unicode in use
    s/\@U\{00D7}/\\(mu/g;
    s/\@U\{22C5}/\\(md/g;
    s/\@U\{00B7}/\\(pc/g;
    #s/\@U\{00F7}/\\(di/g;

    # general Unicode
    s/\@U\{([[:digit:]A-Za-z]{4})}/\\[u$1]/g;

    # prevent double hyphens in options from being converted to en dashes
    s/(\@option\{)--/$1-\\&-/g;

    # show in constant-width font with single quotes
    s/\@file\{([^}]*)}/\@constwidQ\{$1}/g;
    s/\@option\{([^}]*)}/\@constwidQ\{$1}/g;
    s/\@samp\{([^}]*)}/\@constwidQ\{$1}/g;

    s/\@sc\{([^}]*)}/\@to_upper\{$1}/g;

    s/\@key\{([^}]*)}/\@in_italics\{$1}/g;
    s/\@footnote\{([^}]*)}/\@in_square_br\{$1}/g;

    if (/\@w\{([^}]*)}/) {
	s/\@w\{([^}]*)}/\@no_break_word\{$1}/g;
    }

    # leave minus (dash) lists so they can be recognized later
    if (! /^\@itemize/) { s/\@minus\{}/\\-/g; }
    if ($in_comment == 1) {
      # ASCII (c) has no legal significance
      s/\@copyright\{} //g;
    }
    else {
      s/\@copyright\{}/\\(co/g;
    }
    s/\@noindent//;
    s/\@\{/{/g;
    s/\@}/}/g;
    s/\@\@/@/g;
    s/([^-])---/$1\\(em/g;

    # FIXME?
    # assume en dashes will be closed up to previous word
    #s/([^" ]+?)--/$1\\(en/g;
    # hack to handle en dash sample using `--' rather than @samp{--}
    s/`--'/\\(oq\\(en\\(cq/g;

    # allowable line break escape: groff only?
    s/\@\//\\:/g;
    s/^\@raggedright/.na/;
    s/^\@end raggedright/.ad b/;

    s/\@in_sgl_quotes\{([^}]+)}/`$1'/g;
    s/\@in_dbl_quotes\{([^}]+)}/\"$1\"/g;
    s/\@in_italics\{([^}]+)}/\\fI$1\\fP/g;
    s/\@in_roman\{([^}]+)}/\\fR$1\\fP/g;
    s/\@in_bold\{([^}]+)}/\\fB$1\\fP/g;
    s/\@to_upper\{([^}]*)}/\U$1\E/g;
    s/\@no_decoration\{([^}]*)}/$1/g;
    if (/\@no_break_word\{([^}]+)}(\S*)/) {
	$_ = no_break_word("$_", '@no_break_word');
    }
    s/\@no_break_space\{}/\\ /g;
    s/\@[ ]/ /g;
    s/\@in_angle_br\{([^}]*)}/<$1>/g;
    s/\@in_square_br\{([^}]*)}/[$1]/g;

    # convert constwid[IQ]* to inline troff escapes
    # try to prevent hyphenation in alphabetical examples
    s/\@constwid\{([[:alpha:]_][^}]*)}/\\%\\f(CW$1\\fR/g;
    s/\@constwidI\{([[:alpha:]_][^}]*)}/\\%\\f(CI$1\\fR/g;
    s/\@constwidQ\{([[:alpha:]_][^}]*)}/\\%\\(oq\\f(CW$1\\fR\\(cq/g;
    
    # '\%' doesn't seem to prevent hyphenation in nonalphabetical examples
    s/\@constwid\{([^}]*)}/\\f(CW$1\\fR/g;
    s/\@constwidI\{([^}]*)}/\\f(CI$1\\fR/g;
    s/\@constwidQ\{([^}]*)}/\\(oq\\f(CW$1\\fR\\(cq/g;

    if (/\@set codequotebacktick|\@codequotebacktick on/) {
      printf(".if n .tr `\\`\n");
      next;
    }
    if (/\@clear codequotebacktick|\@codequotebacktick off/) {
      printf(".ie .if '\*[.T]'utf8' .tr `\\(oq\n");
      printf(".el .if n .tr `'\n");
      printf(".tr '\\(cq\n");
      next;
    }

    if (/\@set codequoteundirected|\@codequoteundirected on/) {
      printf(".tr '\\(aq\n");
      next;
    }
    if (/\@clear codequoteundirected|\@codequoteundirected off/) {
      printf(".tr '\\(cq\n");
      next;
    }

    s/\@value\{([^\s]+)}/$value{$1}/eg;
    if (/\@set\s+([^\s]+)\s+(.*)$/) { $value{$1} = $2; next; }
    if (/\@clear\s+([^\s]+)\s+(.*)$/) { delete $value{$1}; next; }

    # tables of command-line options as used in units(1)
    if (/\@table (.*)/) { $intable = 1; next; }
    if (/\@end  *table/)
    {
	$intable = 0;
	if ($in_taggedlist == 1) { $in_taggedlist = 0; }
	next;
    }
    if ($intable == 1)
    {
	if (/\@itemx (.*)/)
	{
	    $tag = ".TP";
	    $samp = $1;
	    # add thin space to visually separate the dashes in roman type
	    $samp =~ s/--/\\-\\^\\-/;
	    $samp =~ s/-([[:alnum:]])/-\\^$1/;
	    if (!$diditem)
		{ printf("%s\n.BR \"$samp\"", $tag); }
	    else
		{ printf(" \", \" \"$samp\""); }
	    $diditem=1;
	    $new_paragraph = "";
	    next;
	}
	elsif ($diditem) { printf("\n"); $diditem=0; }
	if (/\@item (.*)/)
	{
	    $in_taggedlist = 1;
	    $tag = ".TP";
	    $samp = $1;
	    # add thin space to visually separate the dashes in roman type
	    $samp =~ s/--/\\-\\^\\-/;
	    $samp =~ s/-([[:alnum:]])/\\-\\^$1/;
	    printf("%s%s\n%s.BR \"$samp\"", $manprefix, $tag, $manprefix);
	    $diditem=1;
	    $new_paragraph = "";
	    next;
	}
    }
    # preserve hyphens
    s/ --(\w+)/ -\\&-$1/g;
    # handle remaining en dashes
    if ($in_comment == 0) {
      s/([^- ])--([^-])/$1\\(en$2/g;
      s/(\w+)--(\w)/$1\\(en$2/g;
    }
    else {
      s/([^- ])--([^-])/$1-$2/g;
    }

    # unordered list: bullet or minus
    if (/^\@itemize *$/ || /^\@itemize +@(bullet|minus)(\{})?/)
    {
	if ($1 =~ "minus") { $listmark = "\\-"; }
	else { $listmark = "\\(bu"; }
	$in_ulist = 1;
	$new_paragraph = "";
	next;
    }
    if ($in_ulist == 1 && /^\@end +itemize/) { $in_ulist = 0; next; }
    if ($in_ulist == 1)
    {
	if (/^\@item *$/) {
	  $tag = ".IP";
	  printf("%s%s \\h'1n'%s 4n\n", $manprefix, $tag, $listmark);
	  $new_paragraph = "";
	}
    }

    # Sections and subsections
    if (s/\@chapter (.*)/\U$1\E/)
    {
	if (/GNU FREE DOCUMENTATION/) { next; }
	$tag = ".SH";
	# restore proper case on font switches
	s/\\FR/\\fR/g;
	s/\\FI/\\f(BI/g;	# chapter headings (SH in man) are bold
	s/\\FP/\\fP/g;
	printf("%s\n", $SH_delim);
	printf("%s%s %s", $manprefix, $tag, $_);
	printf("%s\n", $SH_delim);
	$justdidparagraph=1;
	$new_paragraph = "";
	next;
    }
    if (s/\@section (.*)/$1/)
    {
	$tag = ".SS";
	printf("%s\n", $SS_delim);
	printf("%s%s %s", $manprefix, $tag, $_);
	printf("%s\n", $SS_delim);
	$justdidparagraph=1;
	$new_paragraph = "";
	next;
    }

    # FIXME? why do we need $manprefix for these?
    # input/output example macros
    if (/\@example/) {
      printf("%s.EX\n", $manprefix);
      $example=1;
      $new_paragraph = ".PP";	# EX macro does not provide spacing
      next;
    }
    if (/\@end example/) { printf("%s.EE\n", $manprefix); $example=0; $justdidparagraph=0; next; }

    if (/\@smallexample/) {
      printf("%s.EX\n", $manprefix);
      $example=1;
      $new_paragraph = ".PP";	# EX macro does not provide spacing
      # FIXME? reduce font size?
      next;
    }
    if (/\@end smallexample/) { printf("%s.EE\n", $manprefix); $example=0; $justdidparagraph=0; next; }

    # no CW font
    if (/\@display/) { printf("%s.RS 5n\n", $manprefix, $manprefix); $example=1; next; }
    if (/\@end display/) { printf("%s.RE\n", $manprefix, $manprefix); $example=0; next; }

    # no CW font, no indent
    if (/\@format/) { printf("%s.nf\n", $manprefix); $example=1; next; }
    if (/\@end format/) { printf("%s.fi\n", $manprefix); $example=0; next; }


    if ($example) { s/\\\s*$/\\e\n/ };

    if (/^\@/) { next; }


=ignore
    if ($new_paragraph)
    {
	printf("%s\n", $new_paragraph);
	$justdidparagraph = 1;
	$new_paragraph = "";
    }
=cut

    # blank line: new paragraph; don't output until we see if @item or @itemx follows
    if (!$example && /^\s*$/ && !$doman)
    {
	if ($justdidparagraph) {
	  $new_paragraph = "";
	  next;
	}
	if ($in_taggedlist == 1) {
	    $new_paragraph = ".IP";
	}
	else {
	    $new_paragraph = ".PP";
	}
	next;
    }
    if ($new_paragraph)
    {
	printf("%s\n", $new_paragraph);
	$justdidparagraph = 1;
	$new_paragraph = "";
    }

    # manprefix is either empty or holds comment string .\"
    if (! /^\s*$/) {
      printf("%s%s", $manprefix, $_);
    }

    if (!$doman) { $justdidparagraph=0; }
}

# Override a few default groff man settings. groff loads the man macro file
# after the call of TH, so these settings must likewise follow that call
# of TH.

sub add_extensions
{
    printf(".\\\"\n");
    printf(".\\\"------------------------------------------------------------------------\n");
    printf(".\\\" ensure that ASCII circumflex U+005E (^) and tilde U+007E (~)\n");
    printf(".\\\" are not remapped, so that example text can be copied and pasted\n");
    printf(".tr ^\\(ha\n");
    printf(".tr ~\\(ti\n");
    printf(".\\\" override translation in troffrc\n");
    printf(".ie .if '\\*[.T]'utf8' .tr `\\(oq'\\(cq\n");
    printf(".\\\" override mapping of ` to 60h with Tascii; assume\n");
    printf(".\\\" we don't need a backquote for an example\n");
    printf(".el .if n .tr `'\n");

    # bullet: use '*' rather than 'o' for ASCII/Latin1; override groff's
    # translation to MIDDLE DOT for others
    printf(".if n .tr \\(bu\*\n");
    printf(".\\\" override translation to MIDDLE DOT\n");
    printf(".if '\\*(.T'utf8' .tr \\(bu\\(bu\n");
    printf(".if '\\*(.T'cp1252' .tr \\(bu\\(bu\n");
    printf(".if '\\*(.T'ansi' .tr \\(bu\\(bu\n");
    printf(".\\\"------------------------------------------------------------------------\n");
    printf(".\\\"\n");
}

# convert all spaces within @w{...} to unbreakable
sub no_break_word
{
    my $line = shift;
    my $pattern = (shift) . "\{";
    my $len = length($pattern);
    my $ndx = -1;
    my $bracelevel = 0;
    my $char;

    while (($ndx = index($line, $pattern, $ndx)) > -1) {
	# get rid of the @ command and opening brace
	substr($line, $ndx, $len, '');
	$bracelevel = 1;
	while ($bracelevel > 0) {
	    $char = substr($line, $ndx, 1);
	    # end of line and braces not closed
	    if ($char eq "") {
		last;
	    }
	    elsif ($char eq '{') {
		$bracelevel++;
	    }
	    elsif ($char eq '}') {
		$bracelevel--;
	    }
	    # make spaces nonbreaking
	    if ($char eq ' ') {
		substr($line, $ndx++, 1, '\ ');
		$ndx++;
		# assume multiple spaces are not wanted
		while (substr($line, $ndx, 1) eq ' ') {
		    substr($line, $ndx, 1, '');
		}
		# catch a closing brace after a space
		if (substr($line, $ndx, 1) eq '}') {
		    $bracelevel--;
		}
	    }
	    $ndx++;
	}
	# get rid of the closing brace for the @ command. This should
	# always be true unless there's an internal brace mismatch ...
	if (substr($line, $ndx - 1, 1) eq '}' ) {
	    substr($line, $ndx - 1, 1, '');
	}
	else {
	    die "Missing closing '}'";
	}
    }

    return $line;
}
