# Common.pm: Common code of other Texinfo modules.
#
# Copyright 2010-2026 Free Software Foundation, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License,
# or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
#
# Original author: Patrice Dumas
# Parts (also from Patrice Dumas) come from texi2html.pl or texi2html.init.
package Texinfo::Common;
# for unicode/layer support in binmode
# for binmode documented as pushing :utf8 on top of :encoding
use 5.008001;
use strict;
# have :alpha: match only ASCII
use if $] >= 5.014, re => '/a';
# To check if there is no erroneous autovivification
#no autovivification qw(fetch delete exists store strict);
# to determine the null file
use Config;
use File::Spec;
# for find_encoding, resolve_alias
use Encode;
# debugging
use Carp qw(cluck confess);
# uncomment to check that settable commands are contained in global commands
#use List::Compare;
use Locale::Messages;
use Texinfo::Report;
use Texinfo::Documentlanguages;
use Texinfo::Commands;
use Texinfo::Options;
use Texinfo::CommandsValues;
use Texinfo::TreeElement;
require Exporter;
our @ISA = qw(Exporter);
our @EXPORT_OK = qw(
collect_commands_in_tree
collect_commands_list_in_tree
valid_customization_option
valid_tree_transformation
);
# This is where the Texinfo modules get access to __( without explicit
# import.
our @EXPORT = qw(
__ __p
);
our $VERSION = '7.3';
# TODO document?
use constant {
MESSAGES_TEXTDOMAIN => 'texinfo',
DEFAULT_STRINGS_LANG => 'en',
};
# i18n
# For the messages translations.
sub __($) {
my $msgid = shift;
return Locale::Messages::dgettext(MESSAGES_TEXTDOMAIN, $msgid);
}
sub __p($$) {
my $context = shift;
my $msgid = shift;
return Locale::Messages::dpgettext(MESSAGES_TEXTDOMAIN, $context, $msgid);
}
# For the in-document output strings translations in the texinfo_document
# textdomain.
#
# Return the string but do nothing else, used to mark strings to be translated
# in the Gettext framework.
# Can be used in other modules.
sub gdt($) {
return $_[0];
}
# First argument is the translation context.
sub pgdt($$) {
return $_[1];
}
# to be used when the context needs to be returned too
sub pgdt_context($$) {
return [$_[0], $_[1]];
}
# determine the null device
my $default_null_device = File::Spec->devnull();
# Used in many converters and init files
our %null_device_file = (
$default_null_device => 1
);
# equivalence between a @set flag and an @@-command
our %set_flag_command_equivalent = (
'txicodequoteundirected' => 'codequoteundirected',
'txicodequotebacktick' => 'codequotebacktick',
# 'txideftypefnnl' => 'deftypefnnewline',
);
our %command_equivalent_set_flag;
foreach my $flag (keys(%set_flag_command_equivalent)) {
$command_equivalent_set_flag{$set_flag_command_equivalent{$flag}} = $flag;
}
# Customization options
# used in main program too, not documented
our %non_decoded_customization_variables;
foreach my $variable_name ('MACRO_EXPAND', 'INTERNAL_LINKS') {
$non_decoded_customization_variables{$variable_name} = 1;
}
# can be modified through command-line, but not customization options
my %parser_document_state_configuration = (
# parsed document parsing information still relevant after parsing
'values' => {'txicommandconditionals' => 1},
# the key is the name, the value the @set name
# argument.
# The txicommandconditionals is a special value
# that is set to mark that @ifcommandnotdefined
# is implemented
);
# parser keys related to customization
# Set when initializing a parser, but never from command-line/init files
my %parser_inner_options = (
'accept_internalvalue' => 0, # whether @txiinternalvalue should be added
# to the tree or considered invalid.
# currently set if called by gdt.
);
# this serves both to set defaults and list customization variable
# valid for the parser.
my $common_parser_regular_options_defaults
= Texinfo::Options::get_regular_options('common_parser');
# used in main program, not documented
our %default_parser_customization_values = (
%Texinfo::Options::parser_options,
%$common_parser_regular_options_defaults);
# used in Parser, not documented
our %parser_document_parsing_options = (
%default_parser_customization_values,
%parser_document_state_configuration,
%parser_inner_options);
# check that settable commands are contained in global commands
# from command_data.txt
if (0) {
#if (1) {
my @global_unique_settable = keys(%Texinfo::Options::unique_at_command_options);
my @global_unique_commands = keys(%Texinfo::Commands::global_unique_commands);
my $lcu = List::Compare->new(\@global_unique_settable, \@global_unique_commands);
# only in the first list
if (scalar($lcu->get_unique)) {
warn 'BUG: Unique settable not global: '.join(',',$lcu->get_unique)."\n";
}
if (scalar($lcu->get_complement)) {
print STDERR "REMARK: global_uniq commands not settable: "
.join(',', $lcu->get_complement)."\n";
}
my @global_multi_settable = keys(%Texinfo::Options::multiple_at_command_options);
my @global_multi_commands = keys(%Texinfo::Commands::global_commands);
my $lcm = List::Compare->new(\@global_multi_settable, \@global_multi_commands);
if (scalar($lcm->get_unique)) {
warn 'BUG: Multi settable not global: '.join(',',$lcm->get_unique)."\n";
}
if (scalar($lcm->get_complement)) {
print STDERR "REMARK: global commands not settable: "
.join(',', $lcm->get_complement)."\n";
}
}
# defaults for the main program.
# In general transmitted to converters as defaults.
# Not used in converters, not documented
our %default_main_program_customization_options = (
%Texinfo::Options::program_cmdline_options,
%Texinfo::Options::program_customization_options);
# used in converters
our %document_settable_at_commands
= (%Texinfo::Options::multiple_at_command_options,
%Texinfo::Options::unique_at_command_options);
my %valid_customization_options;
foreach my $var (keys(%document_settable_at_commands),
keys(%Texinfo::Options::program_cmdline_options),
keys(%Texinfo::Options::converter_cmdline_options),
keys(%Texinfo::Options::program_customization_options),
keys(%Texinfo::Options::parser_options),
keys(%Texinfo::Options::converter_customization_options),
keys(%Texinfo::Options::converter_other_options),
keys(%Texinfo::Options::array_cmdline_options)) {
$valid_customization_options{$var} = 1;
}
sub valid_customization_option($) {
my $option = shift;
return $valid_customization_options{$option};
}
# Output formats
# used in main program
our %texinfo_output_formats;
foreach my $output_format_command ('info', 'plaintext',
grep {$Texinfo::Commands::block_commands{$_} eq 'format_raw'}
keys(%Texinfo::Commands::block_commands)) {
$texinfo_output_formats{$output_format_command} = $output_format_command;
}
# constants. Set by the main program.
my %build_constants;
# should only be called from the main program, not documented
sub set_build_constant($$) {
my ($key, $value) = @_;
$build_constants{$key} = $value;
}
sub get_build_constant($) {
my $key = shift;
return $build_constants{$key};
}
# Tree transformations
my %valid_tree_transformations;
foreach my $valid_transformation (
'complete_tree_nodes_menus',
'complete_tree_nodes_missing_menu',
'fill_gaps_in_sectioning',
'insert_nodes_for_sectioning_commands',
'move_index_entries_after_items',
'regenerate_master_menu',
'relate_index_entries_to_table_items') {
$valid_tree_transformations{$valid_transformation} = 1;
}
sub valid_tree_transformation($) {
my $transformation = shift;
return 1 if (defined($transformation)
and exists($valid_tree_transformations{$transformation}));
return 0;
}
# information on encodings
# in Texinfo up to 5.2, we presume that ISO-8859-1 was considered as
# the default encoding (although it had never been said explicitly in
# the manual, it is consistent with HTML output without encoding
# being the default for makeinfo output in earlier versions and being,
# at that time, considered as ISO-8859-1). The wording in the Texinfo
# manual implied that setting US-ASCII had no effect, a possible
# interpretation being that it was an alias for ISO-8859-1. Since
# ISO-8859-1 extends US-ASCII in a compatible way, this interpretation
# is valid. Also, as long as the same 8bit encoding is used for input and
# output, the precise 8bit encoding used to extend US-ASCII has no
# practical consequence, something consistent with past makeinfo supporting
# any 8bit encoding without documentencoding and also when US-ASCII was
# specified as encoding.
#
# To support old manuals in which US-ASCII can be specified although
# the encoding corresponds to any 8bit encoding compatible with ISO-8859-1,
# we convert US-ASCII as ISO-8859-1 to avoid errors for characters in
# ISO-8859-1 but not in US-ASCII.
# Used in Parser, not documented in Pod nor in the manual on purpose, to
# be considered as not guaranteed backward compatibility.
our %encoding_name_conversion_map;
%encoding_name_conversion_map = (
'us-ascii' => 'iso-8859-1',
# The mapping to utf-8 is important for perl code, as it means using a strict
# conversion to utf-8 and not a lax conversion:
# https://perldoc.perl.org/perlunifaq#What's-the-difference-between-UTF-8-and-utf8?
# In more detail, we want to use utf-8 only for two different reasons
# 1) if input is malformed it is better to error out it as soon as possible
# 2) we do not want to have different behaviour and hard to find bugs
# depending on whether the user used @documentencoding utf-8
# or @documentencoding utf8. There is a warning with utf8, but
# we want to be clear in any case.
'utf8' => 'utf-8',
);
# information on @-commands
# used in Parser, not documented
our %def_map = (
# basic commands.
# 'arg' and 'argtype' are for everything appearing after the other
# arguments.
'defline', [ 'category', 'name', 'arg' ],
'deftypeline', [ 'category', 'type', 'name', 'argtype' ],
'deffn', [ 'category', 'name', 'arg' ],
'defvr', [ 'category', 'name' ],
'deftypefn', [ 'category', 'type', 'name', 'argtype' ],
'deftypeop', [ 'category', 'class' , 'type', 'name', 'argtype' ],
'deftypevr', [ 'category', 'type', 'name' ],
'defcv', [ 'category', 'class' , 'name' ],
'deftypecv', [ 'category', 'class' , 'type', 'name' ],
'defop', [ 'category', 'class' , 'name', 'arg' ],
'deftp', [ 'category', 'name', 'argtype' ],
# shortcuts
# The strings are marked to be translated in the parsers in type
# 'untranslated_def_line_arg'.
'defun', {'deffn' => pgdt_context(
'category of functions for @defun',
'Function')},
# TRANSLATORS: category of macros for @defmac
'defmac', {'deffn' => gdt('Macro')},
# TRANSLATORS: category of special forms for @defspec
'defspec', {'deffn' => gdt('Special Form')},
'defvar', {'defvr' => pgdt_context(
'category of variables for @defvar',
'Variable')},
# TRANSLATORS: category of user-modifiable options for @defopt
'defopt', {'defvr' => gdt('User Option')},
'deftypefun', {'deftypefn' => pgdt_context(
'category of functions for @deftypefun',
'Function')},
'deftypevar', {'deftypevr' => pgdt_context(
'category of variables in typed languages for @deftypevar',
'Variable')},
'defivar', {'defcv' => pgdt_context(
'category of instance variables in object-oriented programming for @defivar',
'Instance Variable')},
'deftypeivar', {'deftypecv' => pgdt_context(
'category of instance variables with data type in object-oriented programming for @deftypeivar',
'Instance Variable')},
'defmethod', {'defop' => pgdt_context(
'category of methods in object-oriented programming for @defmethod',
'Method')},
'deftypemethod', {'deftypeop' => pgdt_context(
'category of methods with data type in object-oriented programming for @deftypemethod',
'Method')},
);
# Argument not metasyntactic variables only.
our %def_no_var_arg_commands;
our %def_aliases;
foreach my $def_command(keys %def_map) {
if (ref($def_map{$def_command}) eq 'HASH') {
my ($real_command) = keys(%{$def_map{$def_command}});
$def_aliases{$def_command} = $real_command;
$def_aliases{$def_command.'x'} = $real_command.'x';
}
$def_no_var_arg_commands{$def_command} = 1 if ($def_command =~ /^deftype/);
}
our %small_block_associated_command;
for my $cmd ('example', 'display', 'format', 'lisp', 'quotation',
'indentedblock') {
$small_block_associated_command{'small'.$cmd} = $cmd;
};
my %command_structuring_level
= %Texinfo::CommandsValues::command_structuring_level;
# Not directly used in converters, not documented
our %level_to_structuring_command;
{
my $sections = [ ];
my $appendices = [ ];
my $unnumbered = [ ];
my $headings = [ ];
# set levels for synonyms
$level_to_structuring_command{'appendixsection'} = $appendices;
$level_to_structuring_command{'majorheading'} = $headings;
$level_to_structuring_command{'centerchap'} = $unnumbered;
foreach my $command (keys (%command_structuring_level)) {
next if (exists($level_to_structuring_command{$command}));
if ($command =~ /^appendix/) {
$level_to_structuring_command{$command} = $appendices;
} elsif ($command =~ /^unnumbered/ or $command eq 'top') {
$level_to_structuring_command{$command} = $unnumbered;
} elsif ($command =~ /section$/ or $command eq 'chapter') {
$level_to_structuring_command{$command} = $sections;
} else {
# no mapping for part, it is outside of the main hierarchy
next if ($command eq 'part');
$level_to_structuring_command{$command} = $headings;
}
my $command_level = $command_structuring_level{$command};
if (defined($level_to_structuring_command{$command}->[$command_level])) {
die "$command: level_to_structuring_command already set to "
.$level_to_structuring_command{$command}->[$command_level]."\n";
}
$level_to_structuring_command{$command}->[$command_level]
= $command;
}
}
# %all_commands includes user-settable commands only.
# The internal commands are not in %all_commands.
# used in util/txicmdlist and in a check that needs to be manually
# enabled in HTML code
our %all_commands;
foreach my $command (
keys(%Texinfo::Commands::block_commands),
keys(%Texinfo::Commands::brace_commands),
keys(%Texinfo::Commands::line_commands),
keys(%Texinfo::Commands::nobrace_commands),
) {
$all_commands{$command} = 1;
}
foreach my $internal_command (keys(%Texinfo::Commands::internal_commands)) {
delete $all_commands{$internal_command};
}
# functions used in main program, Parser and/or Texinfo::Structuring.
# Not supposed to be called in user-defined code.
# ALTIMP C/main/utils.c analyze_documentlanguage_argument (no message)
# for Parser and main program
sub warn_unknown_language($) {
my $lang = shift;
my @messages = ();
my $lang_code = $lang;
my $region_code;
if ($lang =~ /^([a-z]+)_([A-Z]+)$/) {
$lang_code = $1;
$region_code = $2;
}
if (! $Texinfo::Documentlanguages::language_codes{$lang_code}) {
push @messages, sprintf(__("%s is not a valid language code"),
$lang_code);
}
if (defined($region_code)
and ! $Texinfo::Documentlanguages::region_codes{$region_code}) {
push @messages, sprintf(__("%s is not a valid region code"),
$region_code);
}
return @messages;
}
# next functions are for code used in Structuring or Indices in addition
# to Parser. Also possibly used in Texinfo::Transformations.
# ALTIMP C/main/utils.c
sub ultimate_index($$) {
my ($indices_information, $index) = @_;
while (exists($index->{'merged_in'})) {
$index = $indices_information->{$index->{'merged_in'}};
}
return $index;
}
# ALTIMP C/main/manipulate_tree.c
# relocate $SOURCE_MARKS source marks with position between
# $BEGIN_POSITION and $BEGIN_POSITION + $ADDED_LEN to be relative to
# $BEGIN_POSITION, and move to element $E.
# return $BEGIN_POSITION + $ADDED_LEN if there were source marks
sub relocate_source_marks($$$$) {
my $source_marks = shift;
return undef if (!defined($source_marks));
my ($e, $begin_position, $added_len) = @_;
my $end_position = $begin_position + $added_len;
my @indices_to_remove;
# collect source marks to remove starting from the beginning to keep
# the correct order in the $e element. Order indices to remove
# in the reverse order to start from the last in order not to change
# the array order when the entry is splice'd away.
for (my $i = 0; $i < scalar(@$source_marks); $i++) {
my $source_mark = $source_marks->[$i];
if (($begin_position == 0
and (!exists($source_mark->{'position'})
# this should never happen
or $source_mark->{'position'} == 0))
or (exists($source_mark->{'position'})
and $source_mark->{'position'} > $begin_position
and $source_mark->{'position'} <= $end_position)) {
unshift @indices_to_remove, $i;
if (exists($e->{'text'})) {
if ($source_mark->{'position'}) {
$source_mark->{'position'}
= $source_mark->{'position'} - $begin_position;
} elsif ($begin_position) {
warn "BUG: no $source_mark->{'position'} but $begin_position\n";
}
} else {
# if the source mark is to be added to a command, it can only be right
# after the command. The current use case is a symbol with a source
# mark after the symbol replaced by an @-command, so we are in
# the case of $added_length = 1 and
# $source_mark->{'position'} == $end_position
if ($source_mark->{'position'}
and $source_mark->{'position'} - $begin_position > 1) {
warn "BUG? after command $source_mark->{'position'} way past $begin_position\n";
}
delete $source_mark->{'position'};
}
$e->{'source_marks'} = [] if (! exists($e->{'source_marks'}));
push @{$e->{'source_marks'}}, $source_mark;
} elsif (exists($source_marks->[$i]->{'position'})
and $source_marks->[$i]->{'position'} > $end_position) {
# only correct if positions are always monotonically increasing
# but should be the case for now
last;
}
}
foreach my $i (@indices_to_remove) {
splice (@$source_marks, $i, 1);
}
return $end_position;
}
sub _find_end_brace($$) {
my ($text, $braces_count) = @_;
my $before = '';
while ($braces_count > 0 and length($text)) {
if ($text =~ s/([^()]*)([()])//) {
$before .= $1.$2;
my $brace = $2;
if ($brace eq '(') {
$braces_count++;
} else {
$braces_count--;
if ($braces_count == 0) {
return ($before, $text, 0);
}
}
} else {
$before .= $text;
$text = '';
}
}
return ($before, undef, $braces_count);
}
# This only counts opening braces, and returns 0 once all the parentheses
# are closed
sub _count_opened_tree_braces($$);
sub _count_opened_tree_braces($$) {
my ($current, $braces_count) = @_;
if (defined($current->{'text'})) {
my ($before, $after);
($before, $after, $braces_count) = _find_end_brace($current->{'text'},
$braces_count);
}
return $braces_count;
}
# ALTIMP C/main/manipulate_tree.c
# retrieve a leading manual name in parentheses, if there is one.
# $LABEL_CONTENTS_CONTAINER->{'contents'} is the Texinfo for the specification
# of a node. It is relevant in any situation when a label is expected,
# @node, menu entry, float, anchor... For the @node command, for instance,
# $LABEL_CONTENTS_CONTAINER is typically
# $node->{'contents'}->[0]->{'contents'}->[0].
#
# Returned object is a hash with two fields:
#
# manual_content - Texinfo tree for a manual name extracted from the
# node specification.
# node_content - Texinfo tree for the node name on its own
#
# If $MODIFY_TREE is set and manual name is retrieved, modify the
# $LABEL_CONTENTS_CONTAINER contents such that the parentheses, the manual
# part and the node part are not in the same elements in the contents,
# and use the corresponding elements in the returned object.
#
# Could be documented, but only if there is evidence that this function
# is useful in user-defined code.
sub parse_node_manual($;$) {
my ($label_contents_container, $modify_node) = @_;
return undef if (!exists($label_contents_container->{'contents'}));
my $contents = $label_contents_container->{'contents'};
my $idx = 0;
my $manual;
my $result;
my $node_content = [];
if (exists($contents->[0]->{'text'}) and $contents->[0]->{'text'} =~ /^\(/) {
my ($new_first, $opening_brace);
$manual = [];
my $braces_count = 1; # Number of ( seen minus number of ) seen.
# the leading ( from @$contents is not in manual.
# If the first contents element is "(" followed by more text, split
# the leading "(" into its own element.
my $first = $contents->[0];
if ($first->{'text'} ne '(') {
if ($modify_node) {
$opening_brace
= Texinfo::TreeElement::new({'text' => '('});
}
my $brace_text = $first->{'text'};
$brace_text =~ s/^\(//;
$new_first = Texinfo::TreeElement::new({'text' => $brace_text});
} else {
# first element is "(", it is not part of the manual, keep it
$idx++;
}
for (; $idx < scalar(@$contents); $idx++) {
my $content;
if ($idx == 0) {
$content = $new_first;
} else {
$content = $contents->[$idx];
}
if (!exists($content->{'text'}) or $content->{'text'} !~ /\)/) {
push @$manual, $content;
$braces_count = _count_opened_tree_braces($content, $braces_count);
# This is an error, braces were closed in a command
if ($braces_count == 0) {
last;
}
} else {
my ($before, $after);
($before, $after, $braces_count) = _find_end_brace($content->{'text'},
$braces_count);
if ($braces_count == 0) {
my $current_position = 0;
# At this point, we are sure that there is a manual part,
# so the pending removal/addition of elements at the beginning
# of the manual can proceed (if modify_node).
if ($modify_node) {
if ($opening_brace) {
# remove the original $first first element and prepend the
# split "(" and text elements
shift @$contents;
unshift @$contents, $new_first;
unshift @$contents, $opening_brace;
$idx++;
if (exists($first->{'source_marks'})) {
my $current_position = relocate_source_marks(
$first->{'source_marks'}, $opening_brace,
0, length($opening_brace->{'text'}));
relocate_source_marks($first->{'source_marks'}, $new_first,
$current_position, length($new_first->{'text'}));
}
}
# Remove current element $content with closing brace from the tree.
splice(@$contents, $idx, 1);
}
# remove the closing ), it is not in manual_content
$before =~ s/(\))$//;
if ($before ne '') {
# text before ), part of the manual name
my $last_manual_element
= Texinfo::TreeElement::new({ 'text' => $before });
push @$manual, $last_manual_element;
if ($modify_node) {
splice(@$contents, $idx, 0, $last_manual_element);
$idx++;
$current_position = relocate_source_marks(
$content->{'source_marks'}, $last_manual_element,
$current_position, length($before));
}
}
if ($modify_node) {
my $closing_brace
= Texinfo::TreeElement::new({'text' => ')'});
splice(@$contents, $idx, 0, $closing_brace);
$idx++;
$current_position = relocate_source_marks(
$content->{'source_marks'}, $closing_brace,
$current_position, length($closing_brace->{'text'}));
}
$after =~ s/^(\s*)//;
my $spaces_after = $1;
if ($spaces_after and $modify_node) {
my $spaces_element
= Texinfo::TreeElement::new({'text' => $spaces_after,});
splice(@$contents, $idx, 0, $spaces_element);
$idx++;
$current_position = relocate_source_marks(
$content->{'source_marks'}, $spaces_element,
$current_position, length($spaces_after));
}
if ($after ne '') {
# text after ), part of the node name.
my $leading_node_content
= Texinfo::TreeElement::new({'text' => $after});
push @$node_content, $leading_node_content;
if ($modify_node) {
splice(@$contents, $idx, 0, $leading_node_content);
$current_position = relocate_source_marks(
$content->{'source_marks'}, $leading_node_content,
$current_position, length($after));
}
$idx++;
}
last;
} else {
push @$manual, $content;
}
}
}
if ($braces_count != 0) {
# unclosed brace, reset
$manual = undef;
$idx = 0;
} else {
$result = {};
$result->{'manual_content'}
= Texinfo::TreeElement::new({'contents' => $manual});
}
}
if ($idx < scalar(@$contents)) {
push(@$node_content, @$contents[$idx .. scalar(@$contents)-1]);
}
if (scalar(@$node_content)) {
$result = Texinfo::TreeElement::new({}) if (!defined($result));
$result->{'node_content'}
= Texinfo::TreeElement::new({'contents' => $node_content});
}
return $result;
}
# misc functions used in diverse contexts and useful in converters
# Wrapper around File::Spec->canonpath that replace backslashes by
# forward slashes.
sub file_separator_canonpath($) {
my $dir = shift;
my $canon_dir = File::Spec->canonpath($dir);
# normalize to forward slash, as we use forward slashes as much as
# possible in the code.
if ($^O eq 'MSWin32') {
my @dir_parts = File::Spec->splitdir($canon_dir);
return join('/', @dir_parts);
} else {
return $canon_dir;
}
}
# ALTIMP C/convert/converter.c
# Used in converters
# find the accent commands stack and the innermost text contents
sub find_innermost_accent_contents($) {
my $current = shift;
my @accent_commands = ();
my $debug = 0;
ACCENT:
while (1) {
my $current_cmdname = $current->{'cmdname'};
# the following can happen if called with a bad tree
if (!defined($current_cmdname)
or !exists($Texinfo::Commands::accent_commands{$current_cmdname})) {
#print STDERR "BUG: Not an accent command in accent\n";
cluck "BUG: Not an accent command in accent\n";
#print STDERR Texinfo::Convert::Texinfo::convert_to_texinfo($current)."\n";
#print STDERR Data::Dumper->Dump([$current]);
return (undef, \@accent_commands);
}
push @accent_commands, $current;
# A bogus accent, that may happen
if (!exists($current->{'contents'})) {
return (undef, \@accent_commands);
}
my $arg = $current->{'contents'}->[0];
if (!exists($arg->{'contents'})) {
return (undef, \@accent_commands);
}
# inside the argument of an accent
my $text_contents = [];
foreach my $content (@{$arg->{'contents'}}) {
if (exists($content->{'cmdname'})) {
my $cmdname = $content->{'cmdname'};
if (exists($Texinfo::Commands::accent_commands{$cmdname})) {
# if outer accent is tieaccent, keep accent inside and do not try to
# nest more
if ($current_cmdname ne 'tieaccent') {
$current = $content;
next ACCENT;
}
} elsif ($cmdname eq 'c' or $cmdname eq 'comment') {
next;
}
}
push @$text_contents, $content;
}
# we go here if there was no nested accent
my $element_inside
= Texinfo::TreeElement::new({'contents' => $text_contents});
return ($element_inside, \@accent_commands);
}
}
# ALTIMP C/main/utils.c
# TODO document
# Used in converters
sub multitable_columnfractions($) {
my $multitable = shift;
my $arguments_line = $multitable->{'contents'}->[0];
my $block_line_arg = $arguments_line->{'contents'}->[0];
my $columnfractions;
if (exists($block_line_arg->{'contents'})
and $block_line_arg->{'contents'}->[0]->{'cmdname'}
and $block_line_arg->{'contents'}->[0]->{'cmdname'}
eq 'columnfractions') {
$columnfractions = $block_line_arg->{'contents'}->[0];
}
return $columnfractions;
}
# ALTIMP C/main/utils.c
# there is a command as argument for a block command (@itemize or
# @table, @vtable...) if there is only one argument on the line,
# it is a brace command but not an accent command and it is empty.
sub block_line_argument_command($) {
my $block_line_arg = shift;
if (exists($block_line_arg->{'contents'})
and scalar(@{$block_line_arg->{'contents'}}) == 1) {
my $arg = $block_line_arg->{'contents'}->[0];
if (exists($arg->{'cmdname'})
and (!exists($arg->{'contents'})
or (scalar(@{$arg->{'contents'}}) == 1
and !exists($arg->{'contents'}->[0]->{'contents'})))) {
my $cmdname = $arg->{'cmdname'};
if ((exists($Texinfo::Commands::brace_commands{$cmdname})
and !exists($Texinfo::Commands::accent_commands{$cmdname}))
or (exists($arg->{'type'})
and $arg->{'type'} eq 'definfoenclose_command')) {
return $arg;
}
}
}
return undef;
}
my $default_bullet_command = Texinfo::TreeElement::new({'cmdname' => 'bullet'});
# ALTIMP C/main/convert_utils.c
sub itemize_line_prepended_element($) {
my $block_line_arg = shift;
my $arg = block_line_argument_command($block_line_arg);
if (defined($arg)) {
return $arg;
} elsif (!exists($block_line_arg->{'contents'})) {
return $default_bullet_command;
} else {
return $block_line_arg;
}
}
# ALTIMP main/convert_utils.c
sub item_itemize_prepended($) {
my $element = shift;
# parent line arguments_line type element
my $arguments_line = $element->{'parent'}->{'contents'}->[0];
my $block_line_arg = $arguments_line->{'contents'}->[0];
return itemize_line_prepended_element($block_line_arg);
}
# ALTIMP C/main/convert_utils.c
sub item_line_block_line_argument_command($) {
my $block_line_arg = shift;
my $arg = block_line_argument_command($block_line_arg);
if (defined($arg)) {
my $brace_category = $Texinfo::Commands::brace_commands{$arg->{'cmdname'}};
# $Texinfo::Commands::brace_commands{} is undef
# for definfoenclose'd commands
if (defined($brace_category) and $brace_category eq 'noarg') {
$arg = undef;
}
}
return $arg;
}
my $default_asis_command = Texinfo::TreeElement::new({'cmdname' => 'asis'});
# ALTIMP C/main/convert_utils.c
# always return something
sub block_item_line_command($) {
my $block_line_arg = shift;
my $arg = item_line_block_line_argument_command($block_line_arg);
if (!defined($arg)) {
$arg = $default_asis_command;
}
return $arg;
}
# ALTIMP C/main/utils.c
sub find_float_caption_shortcaption($) {
my $float = shift;
my $caption;
my $shortcaption;
foreach my $content (@{$float->{'contents'}}) {
if (exists($content->{'cmdname'})) {
if ($content->{'cmdname'} eq 'caption') {
$caption = $content unless($caption);
last if $shortcaption;
} elsif ($content->{'cmdname'} eq 'shortcaption') {
$shortcaption = $content unless($shortcaption);
last if $caption;
}
}
}
return ($caption, $shortcaption);
}
# ALTIMP C/main/utils.c
# TODO document
# used in Texinfo::Indices and converters
sub collect_subentries($$);
sub collect_subentries($$) {
my ($current, $subentries) = @_;
my $line_arg = $current->{'contents'}->[0];
foreach my $content (@{$line_arg->{'contents'}}) {
if (exists($content->{'cmdname'})
and $content->{'cmdname'} eq 'subentry') {
push @$subentries, $content;
collect_subentries($content, $subentries);
}
}
}
sub index_entry_referred_entry($$);
# ALTIMP C/main/utils.c
# TODO document
# Used in converters
sub index_entry_referred_entry($$) {
my ($element, $referred_cmdname) = @_;
my $line_arg = $element->{'contents'}->[0];
if (exists($line_arg->{'contents'})) {
foreach my $content (@{$line_arg->{'contents'}}) {
if (exists($content->{'cmdname'})) {
if ($content->{'cmdname'} eq $referred_cmdname) {
return $content->{'contents'}->[0]
if (exists($content->{'contents'}));
} elsif ($content->{'cmdname'} eq 'subentry') {
return index_entry_referred_entry($content, $referred_cmdname);
}
}
}
}
return undef;
}
# ALTIMP C/main/utils.c
# Used in main program, tests and HTML Converter.
# TODO document?
#
# $INPUT_FILE_PATH: file name to locate. Binary string. If it is a file
# path, it is not searched for in directories.
# $DIRECTORIES: a reference on a array containing a list of directories to
# search the file in. Binary strings.
# $ALL_FILES: if true collect all the files with that name, otherwise stop
# at first match.
# $DEPRECATED_DIRS: optional argument. If set, associates deprecated directory
# names with the directory name that should be used
#
# Return an array reference with the files found, only one if $ALL_FILES
# is false, and an array reference with the deprecated directories used.
sub locate_file_in_dirs($$$;$) {
my ($input_file_path, $directories, $all_files, $deprecated_dirs) = @_;
my $deprecated_dirs_used;
if (File::Spec->file_name_is_absolute($input_file_path)) {
return ([$input_file_path], undef)
if (-e $input_file_path and -r $input_file_path);
} else {
my ($volume, $path_directories, $file)
= File::Spec->splitpath($input_file_path);
my @path_directories = File::Spec->splitdir($path_directories);
if (scalar(@path_directories) > 0) {
# do not search in directories if the file name already contains
# directories.
return ([$input_file_path], undef)
if (-e $input_file_path and -r $input_file_path);
} else {
my @files;
foreach my $dir (@$directories) {
next unless (-d $dir);
my $possible_file = "$dir/$input_file_path";
if (-e $possible_file and -r $possible_file) {
if ($deprecated_dirs and $deprecated_dirs->{$dir}) {
$deprecated_dirs_used = [] if (!defined($deprecated_dirs_used));
push @$deprecated_dirs_used, $dir;
}
if ($all_files) {
push (@files, $possible_file);
} else {
return [$possible_file], $deprecated_dirs_used;
}
}
}
return (\@files, $deprecated_dirs_used)
if ($all_files and scalar(@files));
}
}
return undef, undef;
}
sub processing_output_encoding($) {
my $encoding = shift;
my $perl_encoding;
if (defined($encoding) and $encoding ne '') {
$encoding = $encoding_name_conversion_map{$encoding}
if (defined($encoding_name_conversion_map{$encoding}));
my $Encode_encoding_object = Encode::find_encoding($encoding);
if (defined($Encode_encoding_object)) {
$perl_encoding = $Encode_encoding_object->name();
$perl_encoding = undef if (defined($perl_encoding)
and $perl_encoding eq '');
}
}
return $perl_encoding;
}
sub associated_processing_encoding($) {
my $element = shift;
my $encoding = $element->{'extra'}->{'input_encoding_name'}
if (exists($element->{'extra'}));
return processing_output_encoding($encoding);
}
# Reverse the decoding of the file name from the input encoding. When
# dealing with file names, we want Perl strings representing sequences of
# bytes, not Unicode codepoints.
# This is necessary even if the name of the included file is purely
# ASCII, as the name of the directory it is located within may contain
# non-ASCII characters.
# Otherwise, the -e operator and similar may not work correctly.
sub encode_file_name($$) {
my ($file_name, $input_encoding) = @_;
my $encoding;
return ($file_name, $encoding)
if (not defined($input_encoding));
if ($input_encoding eq 'utf-8' or $input_encoding eq 'utf-8-strict') {
$encoding = 'utf-8';
} else {
$encoding = $input_encoding;
}
my $encoded_file_name = Encode::encode($encoding, $file_name);
return ($encoded_file_name, $encoding);
}
# ALTIMP C/main/utils.c
sub locate_include_file($;$) {
my ($input_file_path, $include_directories) = @_;
my $ignore_include_directories = 0;
# If the path is absolute or begins with . or .., do not search in
# include directories. This is consistent with Kpathsea for Texinfo TeX.
if (File::Spec->file_name_is_absolute($input_file_path)) {
$ignore_include_directories = 1;
} else {
my ($volume, $directories, $filename)
= File::Spec->splitpath($input_file_path);
my @directories = File::Spec->splitdir($directories);
foreach my $dir (@directories) {
if ($dir eq File::Spec->updir() or $dir eq File::Spec->curdir()) {
$ignore_include_directories = 1;
last;
} elsif ($dir ne '') {
last;
}
}
}
if ($ignore_include_directories) {
if (-e $input_file_path and -r $input_file_path) {
return $input_file_path;
}
} else {
if (!$include_directories) {
# no directory list and not an absolute path, never succeed
return undef;
}
foreach my $include_dir (@$include_directories) {
my $possible_file = "$include_dir/$input_file_path";
if (-e $possible_file and -r $possible_file) {
return $possible_file;
}
}
}
return undef;
}
# ALTIMP C/main/utils.c
# TODO document?
sub informative_command_value($) {
my $element = shift;
my ($cmdname, $value) = element_value_equivalent($element);
if (defined($cmdname)) {
return ($cmdname, $value);
}
$cmdname = $element->{'cmdname'};
$cmdname = 'shortcontents' if ($cmdname eq 'summarycontents');
if ($Texinfo::Commands::line_commands{$cmdname} eq 'lineraw') {
if (not $Texinfo::Commands::commands_args_number{$cmdname}) {
return $cmdname, 1;
} elsif (exists($element->{'contents'})) {
return $cmdname, join(' ', map {$_->{'text'}} @{$element->{'contents'}});
}
} elsif (exists($element->{'extra'})
and exists($element->{'extra'}->{'text_arg'})) {
return $cmdname, $element->{'extra'}->{'text_arg'};
} elsif (exists($element->{'extra'})
and exists($element->{'extra'}->{'misc_args'})
and exists($element->{'extra'}->{'misc_args'}->[0])) {
return $cmdname, $element->{'extra'}->{'misc_args'}->[0];
} elsif ($Texinfo::Commands::line_commands{$cmdname} eq 'line'
and exists($element->{'contents'}->[0]->{'contents'})
and exists($element->{'contents'}->[0]->{'contents'}->[0]->{'text'})) {
return $cmdname, $element->{'contents'}->[0]->{'contents'}->[0]->{'text'};
}
return undef, undef;
}
# ALTIMP C/main/utils.c
# Handle @set txicodequoteundirected as an
# alternative to @codequoteundirected.
sub element_value_equivalent($) {
my $element = shift;
my $cmdname = $element->{'cmdname'};
if ($cmdname eq 'set' or $cmdname eq 'clear') {
my $misc_args;
if (exists($element->{'extra'})
and exists($element->{'extra'}->{'misc_args'})) {
$misc_args = $element->{'extra'}->{'misc_args'};
if (scalar(@$misc_args)) {
my $flag = $misc_args->[0];
my $equivalent_cmdname = $set_flag_command_equivalent{$flag};
if (defined($equivalent_cmdname)) {
my $value;
if ($cmdname eq 'set') {
$value = 'on';
} else {
$value = 'off';
}
return ($equivalent_cmdname, $value);
}
}
}
# returning a cmdname avoid having the argument checked, and
# having an undef value means that the value will be ignored.
# So this return value leads to the command being ignored.
return ($cmdname, undef);
}
return (undef, undef);
}
# ALTIMP C/main/customization_options.c
# REMARK documentencoding handling is not reverted by resetting a value with
# set_conf, as the encodings are set using other sources of information
# (possibly based on @documentencoding) in converter.
sub set_informative_command_value($$) {
my ($self, $element) = @_;
my ($cmdname, $value) = informative_command_value($element);
if (defined($value)) {
my $set = $self->set_conf($cmdname, $value);
return $set;
}
return 0;
}
sub _in_preamble($) {
my $element = shift;
my $current_element = $element;
while (exists($current_element->{'parent'})) {
if (exists($current_element->{'parent'}->{'type'})
and $current_element->{'parent'}->{'type'} eq 'preamble_before_content') {
return 1;
}
$current_element = $current_element->{'parent'};
}
return 0;
}
# ALTIMP C/main/utils.c
# $COMMAND_LOCATION is 'last', 'preamble' or 'preamble_or_first'
# 'preamble' means setting sequentially to the values in the preamble.
# 'preamble_or_first' means setting to the first value for the command
# in the document if the first command is not in the preamble, else set
# sequentially to the values in the preamble.
# 'last' means setting to the last value for the command in the document.
#
# For unique command, the last may be considered to be the same as the first.
sub get_global_document_command($$$) {
my ($global_commands_information, $global_command, $command_location) = @_;
if ($command_location ne 'last' and $command_location ne 'preamble_or_first'
and $command_location ne 'preamble') {
warn "BUG: get_global_document_command: unknown command_location: $command_location";
}
my $element;
if (defined($global_commands_information)
and $global_commands_information->{$global_command}) {
if (ref($global_commands_information->{$global_command}) eq 'ARRAY') {
if ($command_location eq 'last') {
$element = $global_commands_information->{$global_command}->[-1];
} else {
if ($command_location eq 'preamble_or_first'
and not _in_preamble($global_commands_information->{$global_command}->[0])) {
$element =
$global_commands_information->{$global_command}->[0];
} else {
foreach my $command_element (@{$global_commands_information->{$global_command}}) {
if (_in_preamble($command_element)) {
$element = $command_element;
} else {
last;
}
}
}
}
} else {
# unique command, first, preamble and last are the same
$element = $global_commands_information->{$global_command};
}
}
return $element;
}
# ALTIMP C/main/customization_options.c
# Notice that the only effect is to use set_conf (directly or through
# set_informative_command_value), no @-commands setting side effects are done
# and associated customization variables are not set/reset either.
sub set_global_document_command($$$$) {
my ($self, $global_commands_information, $global_command,
$command_location) = @_;
my $element = get_global_document_command($global_commands_information,
$global_command, $command_location);
if ($element) {
set_informative_command_value($self, $element);
}
return $element;
}
# ALTIMP C/main/utils.c
sub lookup_index_entry($$) {
my ($index_entry_info, $indices_information) = @_;
if (!defined($index_entry_info)) {
cluck ("BUG: lookup_index_entry: index_entry_info undef\n");
}
my ($entry_index_name, $entry_number) = @{$index_entry_info};
my $index_info;
if ($indices_information->{$entry_index_name}) {
$index_info = $indices_information->{$entry_index_name};
if (exists($index_info->{'index_entries'})
and $index_info->{'index_entries'}->[$entry_number-1]) {
return ($index_info->{'index_entries'}->[$entry_number-1], $index_info);
}
}
return (undef, $index_info);
}
# only used from Perl
# Remove or replace first $REMOVED from $ARRAY
sub replace_remove_list_element($$;$) {
my ($array, $removed, $replacement) = @_;
#if (!defined($array)) {
# cluck();
#}
for (my $index = 0; $index < scalar(@$array); $index++) {
if ($array->[$index] eq $removed) {
if (!defined($replacement)) {
return splice(@$array, $index, 1);
} else {
return splice(@$array, $index, 1, $replacement);
}
}
}
return undef;
}
# ALTIMP C/main/document.c
sub set_output_encoding($$) {
my ($customization_information, $document) = @_;
my $document_information;
if (defined($document)) {
$document_information = $document->global_information();
}
if (!defined($customization_information->get_conf('OUTPUT_ENCODING_NAME'))
and defined($document_information)
and exists($document_information->{'input_encoding_name'})) {
$customization_information->set_conf('OUTPUT_ENCODING_NAME',
$document_information->{'input_encoding_name'})
}
}
# ALTIMP C/main/utils.c
# $DOCUMENT is the parsed Texinfo document. It is optional, but it
# is recommended to pass it to get the input encoding name.
# The input file encoding can be given as $INPUT_FILE_ENCODING optional
# argument, it will be used if $DOC_ENCODING_FOR_INPUT_FILE_NAME is
# undef or set.
sub input_file_name_encoding($$$;$$) {
my ($name_encoding, $doc_encoding_for_input_file_name, $locale_encoding,
$document, $input_file_encoding) = @_;
my $encoding;
if (defined($name_encoding)) {
$encoding = $name_encoding;
} elsif (!defined($doc_encoding_for_input_file_name)
or $doc_encoding_for_input_file_name) {
if (defined($input_file_encoding)) {
$encoding = $input_file_encoding;
} else {
my $document_info;
if ($document) {
$document_info = $document->global_information();
}
$encoding = $document_info->{'input_encoding_name'}
if (defined($document_info)
and exists($document_info->{'input_encoding_name'}));
}
} else {
$encoding = $locale_encoding;
}
return $encoding;
}
my $min_level = $command_structuring_level{'chapter'};
my $max_level = $command_structuring_level{'subsubsection'};
# ALTIMP C/main/utils.c
# Return numbered level of an element, as modified by raise/lowersections
sub section_level($) {
my $section = shift;
my $level = $command_structuring_level{$section->{'cmdname'}};
# correct level according to raise/lowersections
if (exists($section->{'extra'})
and $section->{'extra'}->{'level_modifier'}) {
$level -= $section->{'extra'}->{'level_modifier'};
if ($level < $min_level) {
if ($command_structuring_level{$section->{'cmdname'}} < $min_level) {
$level = $command_structuring_level{$section->{'cmdname'}};
} else {
$level = $min_level;
}
} elsif ($level > $max_level) {
$level = $max_level;
}
}
return $level;
}
# decompose a decimal number on a given base. It is not the
# decomposition used for counting as we start at 0, not 1 for all
# the factors. This is in order to get aa and not ba in calling
# code.
sub _decompose_integer($$) {
my ($number, $base) = @_;
my @result = ();
while ($number >= 0) {
my $factor = $number % $base;
push (@result, $factor);
$number = int(($number - $factor) / $base) - 1;
}
return @result;
}
# ALTIMP C/main/utils.c
sub enumerate_number_representation($$) {
my ($specification, $number) = @_;
if ($specification =~ /^[[:alpha:]]$/) {
my $result = '';
my $base_letter = ord('a');
$base_letter = ord('A') if (ucfirst($specification) eq $specification);
my @letter_ords
= _decompose_integer(ord($specification) - $base_letter + $number - 1, 26);
foreach my $ord (@letter_ords) {
$result = chr($base_letter + $ord) . $result;
}
return $result;
} elsif ($specification =~ /^[0-9]+$/) {
return $specification + $number -1;
}
return $number;
}
# ALTIMP C/main/utils.c
sub enumerate_item_representation($) {
my $element = shift;
my $number = $element->{'extra'}->{'item_number'};
my $enumerate = $element->{'parent'};
my $arguments_line = $enumerate->{'contents'}->[0];
my $block_line_arg = $arguments_line->{'contents'}->[0];
if (exists($block_line_arg->{'contents'})
and exists($block_line_arg->{'contents'}->[0]->{'text'})) {
return enumerate_number_representation(
$block_line_arg->{'contents'}->[0]->{'text'}, $number);
}
return $number;
}
# ALTIMP C/main/utils.c
sub is_content_empty($;$);
sub is_content_empty($;$) {
my ($tree, $do_not_ignore_index_entries) = @_;
if (!defined($tree) or !exists($tree->{'contents'})) {
return 1;
}
foreach my $content (@{$tree->{'contents'}}) {
if (exists($content->{'text'})) {
if ($content->{'text'} =~ /\S/) {
return 0;
} else {
next;
}
}
next if (exists($content->{'type'})
and $content->{'type'} eq 'arguments_line');
if (exists($content->{'cmdname'})) {
if (exists($content->{'type'})
and $content->{'type'} eq 'index_entry_command') {
if ($do_not_ignore_index_entries) {
return 0;
} else {
next;
}
}
if (exists($Texinfo::Commands::line_commands{$content->{'cmdname'}})) {
if (exists(
$Texinfo::Commands::formatted_line_commands{$content->{'cmdname'}})
or exists(
$Texinfo::Commands::formattable_line_commands{$content->{'cmdname'}})) {
return 0;
} else {
next;
}
} elsif (exists(
$Texinfo::Commands::nobrace_commands{$content->{'cmdname'}})) {
if (exists(
$Texinfo::Commands::formatted_nobrace_commands{$content->{'cmdname'}})) {
return 0;
} else {
next;
}
} elsif (exists(
$Texinfo::Commands::non_formatted_brace_commands{$content->{'cmdname'}})
or exists(
$Texinfo::Commands::non_formatted_block_commands{$content->{'cmdname'}})) {
next;
} else {
return 0;
}
}
if (exists($content->{'type'})) {
if ($content->{'type'} eq 'paragraph') {
return 0;
}
}
if (not is_content_empty($content, $do_not_ignore_index_entries)) {
return 0;
}
}
return 1;
}
# if in this container, we are 'inline', within a running text
my @inline_types = ('def_line', 'paragraph', 'preformatted',
'line_arg', 'block_line_arg', 'menu_entry_name', 'menu_entry_node');
my %inline_types;
foreach my $type (@inline_types) {
$inline_types{$type} = 1;
}
my %not_inline_commands;
foreach my $command (
keys(%Texinfo::Commands::root_commands),
keys(%Texinfo::Commands::block_commands),
grep {$Texinfo::Commands::brace_commands{$_} eq 'context'}
keys(%Texinfo::Commands::brace_commands)) {
$not_inline_commands{$command} = 1;
}
# Return 1 if inline in a running text, 0 if right in top-level or block
# environment, and undef otherwise.
# internal, not documented.
sub element_inline_or_block($) {
my $current = shift;
if (exists($current->{'type'}) and $inline_types{$current->{'type'}}) {
return 1;
} else {
my $cmdname = $current->{'cmdname'};
if (exists($current->{'cmdname'})
and exists($not_inline_commands{$current->{'cmdname'}})) {
return 0;
}
}
return undef;
}
# return true if in running text context.
# If $CHECK_CURRENT is set, check the element itself, too, in
# addition to the parent context.
sub element_is_inline($;$) {
my ($current, $check_current) = @_;
if ($check_current) {
my $inline_or_block = element_inline_or_block($current);
return ($inline_or_block) if (defined($inline_or_block));
}
while (exists($current->{'parent'})) {
$current = $current->{'parent'};
my $inline_or_block = element_inline_or_block($current);
return ($inline_or_block) if (defined($inline_or_block));
}
return 0;
}
sub normalize_top_node_name($) {
my $node_name = shift;
if ($node_name =~ /^top$/i) {
return 'Top';
}
return $node_name;
}
# Used in count_bytes
my $Encode_encoding_object;
my $last_encoding;
# Only used in the unmaintained IXIN converter, but could be useful in
# other converters, so it is better to keep it.
sub count_bytes($;$) {
my ($string, $encoding) = @_;
# TODO encoding is unlikely to be ascii, as documentencoding ascii
# is mapped to output encoding name us-ascii, also it may have been
# better to call Texinfo::Common::processing_output_encoding, in
# which case us-ascii would be mapped to iso-8859-1.
if (defined($encoding) and $encoding ne 'ascii') {
if (!defined($last_encoding) or $last_encoding ne $encoding) {
# Look up and save encoding object for next time. This is
# slightly faster than calling Encode::encode.
$last_encoding = $encoding;
$Encode_encoding_object = Encode::find_encoding($encoding);
if (!defined($Encode_encoding_object)) {
Carp::croak "Unknown encoding '$encoding'";
}
}
return length($Encode_encoding_object->encode($string));
} else {
return length($string);
}
}
# ALTIMP C/main/manipulate_indices.c
# if $PREFER_REFERENCE_ELEMENT is set, prefer an untranslated element.
# Seems to be used in converter only
sub index_content_element($;$) {
my ($element, $prefer_reference_element) = @_;
if (exists($element->{'extra'})
and exists($element->{'extra'}->{'def_command'})) {
if ($prefer_reference_element
and exists($element->{'extra'}->{'def_index_ref_element'})) {
return $element->{'extra'}->{'def_index_ref_element'};
} else {
return $element->{'extra'}->{'def_index_element'};
}
} else {
return $element->{'contents'}->[0];
}
}
# custom heading command line is split at @|
sub split_custom_heading_command_contents($) {
my $element = shift;
my $contents_nr = scalar(@{$element->{'contents'}});
if (!$contents_nr) {
return undef;
}
my $nr_split_contents = 0;
my $result = {'contents' => []};
my $heading_element = {'contents' => []};
push @{$result->{'contents'}}, $heading_element;
for (my $i = 0; $i < $contents_nr; $i++) {
my $current_content = $element->{'contents'}->[$i];
#print STDERR "$nr_split_contents : "
# .debug_print_element($current_content)."\n";
if (exists($current_content->{'cmdname'})
and $current_content->{'cmdname'} eq '|') {
if ($nr_split_contents < 2) {
$heading_element = {'contents' => []};
push @{$result->{'contents'}}, $heading_element;
$nr_split_contents++;
}
} else {
push @{$heading_element->{'contents'}}, $current_content;
}
}
return $result;
}
# not currently used
sub find_parent_root_command($$);
sub find_parent_root_command($$) {
my ($self, $current) = @_;
my $root_command;
while (1) {
if (exists($current->{'cmdname'})) {
if (exists($Texinfo::Commands::root_commands{$current->{'cmdname'}})) {
return $current;
} elsif (exists($Texinfo::Commands::block_commands{$current->{'cmdname'}})
and $Texinfo::Commands::block_commands{$current->{'cmdname'}} eq 'region') {
if ($current->{'cmdname'} eq 'copying' and defined($self)
and exists($self->{'document'})) {
my $global_commands = $self->{'document'}->global_commands_information();
if (defined($global_commands)
and exists($global_commands->{'insertcopying'})) {
foreach my $insertcopying(@{$global_commands->{'insertcopying'}}) {
my $root_command
= find_parent_root_command($self, $insertcopying);
return $root_command if (defined($root_command));
}
}
} else {
return undef;
}
}
}
if (exists($current->{'parent'})) {
$current = $current->{'parent'};
} else {
return undef;
}
}
# Should never get there
return undef;
}
# In the default case, global informative commands are collected
# by the parsers. The following functions allow to collect
# any @-command.
# Used in customization init files code and should be useful in
# particular in user-defined init files.
sub collect_commands_in_tree($$) {
my ($root, $commands_list) = @_;
my $commands_hash = {};
foreach my $command_name (@$commands_list) {
$commands_hash->{$command_name} = [];
}
_collect_commands_in_tree($root, $commands_hash);
return $commands_hash;
}
sub _collect_commands_in_tree($$);
sub _collect_commands_in_tree($$) {
my ($current, $commands_hash) = @_;
if (exists($current->{'cmdname'})
and defined($commands_hash->{$current->{'cmdname'}})) {
push @{$commands_hash->{$current->{'cmdname'}}}, $current;
}
if (exists($current->{'contents'})) {
foreach my $child (@{$current->{'contents'}}) {
_collect_commands_in_tree($child, $commands_hash);
}
}
}
sub collect_commands_list_in_tree($$) {
my ($root, $commands_list) = @_;
my $collected_commands_list = [];
my $commands_hash = {};
foreach my $command_name (@$commands_list) {
$commands_hash->{$command_name} = 1;
}
_collect_commands_list_in_tree($root, $commands_hash, $collected_commands_list);
return $collected_commands_list;
}
sub _collect_commands_list_in_tree($$$);
sub _collect_commands_list_in_tree($$$) {
my ($current, $commands_hash, $collected_commands_list) = @_;
if (exists($current->{'cmdname'})
and defined($commands_hash->{$current->{'cmdname'}})) {
push @{$collected_commands_list}, $current;
}
if (exists($current->{'contents'})) {
foreach my $child (@{$current->{'contents'}}) {
_collect_commands_list_in_tree($child, $commands_hash,
$collected_commands_list);
}
}
}
# Common to different module, but not meant to be used in user-defined
# codes.
# ALTIMP C/main/utils.c
sub get_label_element($) {
my $current = shift;
return undef if (!exists($current->{'cmdname'}));
if ($current->{'cmdname'} eq 'node') {
# first content of arguments_line type element
return $current->{'contents'}->[0]->{'contents'}->[0];
} elsif (($current->{'cmdname'} eq 'anchor'
or $current->{'cmdname'} eq 'namedanchor')
and exists($current->{'contents'})) {
return $current->{'contents'}->[0];
} elsif ($current->{'cmdname'} eq 'float'
and scalar(@{$current->{'contents'}->[0]->{'contents'}}) >= 2) {
# second content of arguments_line, ie second argument on line
return $current->{'contents'}->[0]->{'contents'}->[1];
}
return undef;
}
# For code that can be called both from the main
# context and from a converter. With either a converter, and, if
# not given, a document.
# NOTE it is considered internal and should not be called in user-defined
# code. If this changes, should be documented.
sub converter_or_document_line_warn($$$$;$) {
my ($document, $converter, $text, $error_location_info, $continuation) = @_;
if (defined($converter)) {
$converter->converter_line_warn($text, $error_location_info,
$continuation);
} else {
$document->document_line_warn($text, $error_location_info,
$continuation);
}
}
# functions used for debugging. May be used in other modules.
# Not documented.
sub _parent_string($) {
my $current = shift;
my $parent_string;
if (exists($current->{'parent'})) {
my $parent = $current->{'parent'};
my $parent_cmd = '';
my $parent_type = '';
$parent_cmd = "\@$parent->{'cmdname'}" if (exists($parent->{'cmdname'}));
$parent_type = "($parent->{'type'})" if (exists($parent->{'type'}));
$parent_string = " <- $parent_cmd$parent_type";
}
return $parent_string
}
sub debug_command_name($) {
my $cmdname = shift;
if ($cmdname eq "\n") {
return '\n';
} elsif ($cmdname eq "\t") {
return '\t';
} else {
return $cmdname;
}
}
# informations on a tree element, short version
sub debug_print_element($;$) {
my ($current, $print_parent) = @_;
if (!defined($current)) {
return "debug_print_element: UNDEF";
}
my $warning = '';
if (ref($current) ne 'Texinfo::TreeElement') {
if (ref($current) eq 'HASH') {
$warning = '[ERROR:HASH]';
} else {
return "debug_print_element: $current not a tree element";
}
}
my $type = '';
my $cmd = '';
my $text = '';
my $args = '';
my $contents = '';
$type = "($current->{'type'})" if (exists($current->{'type'}));
if (exists($current->{'text'})) {
if ($current->{'text'} eq '') {
$text = "[T]";
} else {
my $text_str = $current->{'text'};
$text_str =~ s/\n/\\n/g;
$text = "[T: $text_str]";
}
} else {
if (exists($current->{'cmdname'})) {
$cmd = '@' . debug_command_name($current->{'cmdname'});
}
$contents = "[C".scalar(@{$current->{'contents'}}).']'
if $current->{'contents'};
}
my $parent_string = '';
if ($print_parent) {
$parent_string = _parent_string($current);
$parent_string = '' if (!defined($parent_string));
}
return "$warning$cmd$type$text$args$contents$parent_string";
}
# for debugging
sub debug_print_element_details($;$) {
my ($current, $print_parent) = @_;
my $string = debug_print_element($current, $print_parent);
foreach my $key (keys (%$current)) {
$string .= " $key: $current->{$key}\n";
}
if (defined($current->{'extra'})) {
$string .= " EXTRA\n";
foreach my $key (keys (%{$current->{'extra'}})) {
$string .= " $key: $current->{'extra'}->{$key}\n";
}
}
if (exists($current->{'info'})) {
$string .= " INFO\n";
foreach my $key (keys (%{$current->{'info'}})) {
$string .= " $key: $current->{'info'}->{$key}\n";
}
}
return $string;
}
sub debug_print_output_unit($) {
my $current = shift;
if (!defined($current)) {
return "debug_print_output_unit: UNDEF\n";
}
if (ref($current) ne 'HASH') {
return "debug_print_output_unit: $current not a hash\n";
}
my $type = $current->{'unit_type'};
# Should never happen
$type = 'UNDEF' if (!defined($type));
my $node_unit_cmd = '';
if (exists($current->{'unit_node'})) {
my $node_element = $current->{'unit_node'}->{'element'};
$node_unit_cmd = '{N';
if ($node_element eq $current->{'unit_command'}) {
$node_unit_cmd .= '*';
}
$node_unit_cmd .= ':' . debug_print_element($node_element, 0)
.Texinfo::Convert::Texinfo::root_heading_command_to_texinfo(
$node_element).'}';
}
my $section_unit_cmd = '';
if (exists($current->{'unit_section'})) {
my $section_element = $current->{'unit_section'}->{'element'};
$section_unit_cmd = '{S';
if ($section_element eq $current->{'unit_command'}) {
$section_unit_cmd .= '*';
}
$section_unit_cmd .= ':'.debug_print_element($section_element, 0)
.Texinfo::Convert::Texinfo::root_heading_command_to_texinfo(
$section_element).'}';
}
my $fname = '';
if (exists($current->{'unit_filename'})) {
$fname = "{F:$current->{'unit_filename'}}";
}
my $variety = '';
if (exists($current->{'special_unit_variety'})) {
$variety = "{V:$current->{'special_unit_variety'}}";
}
my $contents = '';
$contents = "{C".scalar(@{$current->{'unit_contents'}}).'}'
if (exists($current->{'unit_contents'}));
return "{$type}$fname$variety$contents$node_unit_cmd$section_unit_cmd";
}
# format list for debugging messages
sub debug_list {
my $label = shift;
my (@list) = (ref $_[0] && $_[0] =~ /.*ARRAY.*/) ? @{$_[0]} : @_;
my $str = "$label: [";
my @items = ();
for my $item (@list) {
$item = "" if ! defined ($item);
$item =~ s/\n/\\n/g;
push (@items, $item);
}
$str .= join (" ", @items);
$str .= "]";
warn "$str\n";
}
# format hash for debugging messages
sub debug_hash {
my ($label) = shift;
my (%hash) = (ref $_[0] && $_[0] =~ /.*HASH.*/) ? %{$_[0]} : @_;
my $str = "$label: {";
my @items = ();
for my $key (sort keys %hash) {
my $val = $hash{$key} || ""; # no undef
$key =~ s/\n/\\n/g;
$val =~ s/\n/\\n/g;
push (@items, "$key:$val");
}
$str .= join (",", @items);
$str .= "}";
warn "$str\n";
}
# for debugging
use Data::Dumper;
my @kept_keys = ('contents', 'cmdname', 'type', 'text',
'extra', 'info', 'spaces_before_argument',
'spaces_after_argument', 'comment_at_end', 'index_entry'
);
my %kept_keys;
foreach my $key (@kept_keys) {
$kept_keys{$key} = 1;
}
my @kept_keys_output_unit = ('unit_contents');
foreach my $key (@kept_keys_output_unit) {
$kept_keys{$key} = 1;
}
my @kept_keys_handle = ('tree_document_descriptor');
foreach my $key (@kept_keys_handle) {
$kept_keys{$key} = 1;
}
sub _filter_print_keys { [grep {$kept_keys{$_}} ( sort keys %{$_[0]} )] };
sub debug_print_tree($) {
my $tree = shift;
local $Data::Dumper::Sortkeys = \&_filter_print_keys;
local $Data::Dumper::Purity = 1;
local $Data::Dumper::Indent = 1;
my $result = Data::Dumper->Dump([$tree]);
# Tree elements are blessed into the 'Texinfo::TreeElement' class, but
# this information is useless clutter for someone reading it.
$result =~ s/bless\( //g;
$result =~ s/ 'Texinfo::TreeElement' \)//g;
return $result;
}
# Export debug_print_tree with a short name as a convenience for
# working on the program. With this, you can inspect part of the
# parse tree with "warn print_tree($root);" rather than
# "warn Texinfo::Common::debug_print_tree($root);", which is more typing.
*print_tree = \&debug_print_tree;
push @EXPORT, 'print_tree';
1;
__END__
=head1 NAME
Texinfo::Common - Texinfo modules common data and miscellaneous methods
=head1 SYNOPSIS
use Texinfo::Common;
my @commands_to_collect = ('math');
my $collected_commands
= Texinfo::Common::collect_commands_in_tree($document_root,
\@commands_to_collect);
my $package_version
= Texinfo::Common::get_build_constant('PACKAGE_AND_VERSION');
=head1 NOTES
The Texinfo Perl module main purpose is to be used in C to convert
Texinfo to other formats. There is no promise of API stability.
=head1 DESCRIPTION
Texinfo::Common holds hashes with miscellaneous information and some
hashes with information on Texinfo @-commands, as well as miscellaneous
methods.
=head1 MISC INFORMATION
Values defined for a Texinfo build independently of any document or
output format are available by calling C:
=over
=item $value = get_build_constant($name)
The following build constants are available:
=over
=item PACKAGE
=item PACKAGE_CONFIG
=item PACKAGE_AND_VERSION
=item PACKAGE_AND_VERSION_CONFIG
=item PACKAGE_NAME
=item PACKAGE_NAME_CONFIG
=item PACKAGE_VERSION
=item PACKAGE_VERSION_CONFIG
=item PACKAGE_URL
=item PACKAGE_URL_CONFIG
Texinfo package name and versions. Values of build constants without
C<_CONFIG> appended are set by configure. For each variable set by
configure there is another one with C<_CONFIG> appended
to the name set to the same value, to match the name of the macros set in
C. So, for example C value is the same as
C, set to the C value set by configure.
=back
=back
Hashes are defined as C variables, and are therefore available
outside of the module.
=over
=item %document_settable_at_commands
Keys are customization options corresponding to @-commands. For example
C or C.
=item %null_device_file
Keys are null devices names, such as C or C.
=item %texinfo_output_formats
X>
Cannonical output formats that have associated conditionals. In
practice corresponds to C C<%block_commands> plus C
and C.
=back
=head1 @-COMMAND INFORMATION
Hashes are defined as C variables, and are therefore available
outside of the module.
The key of the hashes are @-command names without the @. The
following hashes are available:
=over
=item %all_commands
X>
All the @-commands.
=item %def_aliases
=item %def_no_var_arg_commands
X>
X>
C<%def_aliases> associates an aliased command to the original command, for
example C is associated to C.
C<%def_no_var_arg_commands> associates a definition command name with
a true value if the I on the definition command line can contain
non-metasyntactic variables. For instance, it is true for C
but false for C, since C<@defun> I is supposed to contain
metasyntactic variables only.
=item %small_block_associated_command
X>
Associate small command like C to the regular command
C.
=back
=head1 METHODS
Two methods are exported in the default case for Texinfo modules messages
translation in the Uniforum gettext framework, C<__> and C<__p>.
The Texinfo tree and Texinfo tree elements used in argument of some functions
are documented in L. When customization
information is needed, an object that defines C and/or C is
expected, for example a converter inheriting from
C, see L.
=over
=item $translated_string = __($msgid)
=item $translated_string = __p($msgctxt, $msgid)
Returns the I<$msgid> string translated in the Texinfo messages text domain.
C<__p> can be used instead of C<__> to pass a I<$msgctxt> context string to
provide translators with information on the string context when the string is
short or if the translation could depend on the context. C<__> corresponds to
the C function and C<__p> to the C function.
It is not advised to use those functions in user-defined code. It is not
practical either, as the translatable strings marked by C<__> or C<__p> need to
be collected and added to the Texinfo messages domain. This facility could
only be used in user-defined code with translatable strings already present in
the domain anyway. In fact, these functions are documented mainly because they
are automatically exported.
See L,
L C interface|https://www.gnu.org/software/gettext/manual/html_node/gettext.html>,
L.
For translation of strings in output, see L.
=item collect_commands_in_tree($tree, $commands_list)
X>
Returns a hash reference with keys @-commands names specified
in the I<$commands_list> array reference and values arrays of
tree elements corresponding to those @-command found in I<$tree>
by traversing the tree.
=item collect_commands_list_in_tree($tree, $commands_list)
X>
Return a list reference containing the tree elements corresponding
to the @-commands names specified in the I<$commands_list> found
in I<$tree> by traversing the tree. The order of the @-commands
should be kept.
=item $encoding_name = associated_processing_encoding($element)
X>
Returns the encoding name that can be used for decoding derived
from the encoding that was set where I<$element> appeared.
=item $result = element_is_inline($element, $check_current)
X>
Return true if the element passed in argument is in running text
context. If the optional I<$check_current> argument is set,
check the element itself, in addition to the parent context.
=item ($encoded_file_name, $encoding) = encode_file_name($file_name, $input_encoding)
Encode the I<$file_name> text string to a binary string I<$encoded_file_name>
based on I<$input_encoding>. Also returns the I<$encoding> name actually
used which may have undergone some normalization. This function is mostly
a wrapper around L which avoids calling the module if not
needed. Do nothing if I<$input_encoding> is C.
=item $text = enumerate_item_representation($specification, $number)
X>
This function returns the number or letter correponding to item
number I<$number> for an C<@enumerate> specification I<$specification>,
appearing on an C<@enumerate> line. For example
enumerate_item_representation('c', 3)
is C.
=item ($contents_element, \@accent_commands) = find_innermost_accent_contents($element)
X>
I<$element> should be an accent command Texinfo tree element. Returns
an element containing the innermost accent @-command contents,
normally a text element with one or two letter, and an array reference
containing the accent commands nested in I<$element> (including
I<$element>). If there is no argument at all for the accent command,
I<$contents_element> is C.
=item $command = find_parent_root_command($object, $tree_element)
X>
Find the parent root command (sectioning command or node) of a tree element.
The I<$object> argument is optional, its C field is used
to continue through C<@insertcopying> if in a C<@copying>.
=item $entry_content_element = index_content_element($element, $prefer_reference_element)
Return a Texinfo tree element corresponding to the content of the index
entry associated to I<$element>. If I<$prefer_reference_element> is set,
prefer an untranslated element. If the element is an index command like
C<@cindex> or an C<@ftable> C<@item>, the content element is the argument
of the command. If the element is a definition line, the index entry
element is based on the name and class.
=item $encoding = input_file_name_encoding($name_encoding, $doc_encoding_for_input_file_name, $locale_encoding, $document, $input_file_encoding)
X>
Returns the encoding assumed for input file names.
I<$name_encoding> is used as encoding name if defined. If
I<$doc_encoding_for_input_file_name> is set, the encoding is based on the input
file content encoding, otherwise I<$locale_encoding> is used. The I<$document>
argument is an optional Texinfo parsed document used to get the input document
content encoding. The I<$input_file_encoding> argument is optional, it will be
used in priority if the the encoding is based on the input file encoding. If
I<$input_file_encoding> is not given, it is recommended to pass I<$document>,
otherwise there is no way to determine the input document content encoding.
=item $result = is_content_empty($tree, $do_not_ignore_index_entries)
X>
Return true if the I<$tree> has content that could be formatted.
I<$do_not_ignore_index_entries> is optional. If set, index entries
are considered to be formatted.
=item $file = locate_include_file($customization_information, $file_path)
X>
Locate I<$file_path>. If I<$file_path> is an absolute path or has C<.>
or C<..> in the path directories it is checked that the path exists and is a
file. Otherwise, the file name in I<$file_path> is located in include
directories also used to find texinfo files included in Texinfo documents.
I<$file_path> should be a binary string. C is returned if the file was
not found, otherwise the file found is returned as a binary string.
=item ($index_entry, $index_info) = lookup_index_entry($index_entry_info, $indices_information)
Returns an I<$index_entry> hash based on the I<$index_entry_info> and
I<$indices_information>. Also returns the I<$index_info> hash with information on
the index associated to the index entry. I<$index_entry_info> should be
an array reference with an index name as first element and the index entry number
in that index (1-based) as second element. In general, the I<$index_entry_info>
is an L I|Texinfo::Parser/index_entry> associated to an element.
The I<$index_entry> hash is described in L. The
I<$index_info> hash is described in L<< C|Texinfo::Document/$indices_information = $document->indices_information() >>.
=item $normalized_name = normalize_top_node_name($node_string)
X>
Normalize the node name string given in argument, by normalizing
Top node case.
=item $result = replace_remove_list_element($array, $element, $replacement)
Remove first occurence of I<$element> in the array reference I<$array>.
If the optional I<$replacement> argument is given, replace the I<$element>
by I<$replacement>. Return the removed element, or C if not found.
=item $level = section_level($section)
X>
Return numbered level of the tree sectioning element I<$section>, as modified by
raise/lowersections.
=item $element = set_global_document_command($customization_information, $global_commands_information, $cmdname, $command_location)
X>
Set the Texinfo customization variable corresponding to I<$cmdname> in
I<$customization_information>. The I<$global_commands_information> should
contain information about global commands in a Texinfo document, typically obtained
from a parsed document L<< $document->global_commands_information()|Texinfo::Document/$commands = global_commands_information($document) >>.
I<$command_location> specifies where in the document the value should be taken from,
for commands that may appear more than once. The possibilities are:
=over
=item last
Set to the last value for the command.
=item preamble
Set sequentially to the values in the Texinfo preamble.
=item preamble_or_first
Set to the first value of the command if the first command is not
in the Texinfo preamble, else set as with I,
sequentially to the values in the Texinfo preamble.
=back
The I<$element> returned is the last element that was used to set the
customization value, or C if no customization value was found.
Notice that the only effect of this function is to set a customization
variable value, no @-command side effects are run, no associated customization
variables are set.
=item $status = set_informative_command_value($customization_information, $element)
X>
Set the Texinfo customization option corresponding to the tree element
I<$element>. The command associated to the tree element should be
a command that sets some information, such as C<@documentlanguage>,
C<@contents> or C<@footnotestyle> for example. Return true if the command
argument was found and the customization variable was set.
=item set_output_encoding($customization_information, $document)
X>
If not already set, set C based on input file
encoding.
=item $split_contents = split_custom_heading_command_contents($element)
X>
Split the I<$element> contents at C<@|> in at max three parts.
Return an element containing the split parts in contents, or C if
the I<$element> has no useful content. The input I<$element>
is supposed to be the argument line element of
C<%Texinfo::Commands::heading_spec_commands> commands such as C<@everyheading>.
=item $status = valid_customization_option($name)
X>
Return true if the I<$name> is a known customization option.
=item $status = valid_tree_transformation($name)
X>
Return true if the I<$name> is a known tree transformation name
that may be passed with C to modify a Texinfo
tree.
=back
=head1 SEE ALSO
L, L and L.
=head1 AUTHOR
Patrice Dumas, Ebug-texinfo@gnu.orgE
=head1 COPYRIGHT AND LICENSE
Copyright 2010- Free Software Foundation, Inc. See the source file for
all copyright years.
This library is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or (at
your option) any later version.
=cut