# i18n.py
#
# Copyright (C) 2012-2016 Red Hat, Inc.
#
# This copyrighted material is made available to anyone wishing to use,
# modify, copy, or redistribute it subject to the terms and conditions of
# the GNU General Public License v.2, or (at your option) any later version.
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY expressed or implied, including the implied warranties of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details. You should have received a copy of the
# GNU General Public License along with this program; if not, write to the
# Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA. Any Red Hat trademarks that are incorporated in the
# source code or documentation are not subject to the GNU General Public
# License and may only be used or replicated with the express permission of
# Red Hat, Inc.
#
from __future__ import print_function
from __future__ import unicode_literals
from dnf.pycomp import unicode
import dnf
import locale
import os
import signal
import sys
import unicodedata
"""
Centralize i18n stuff here. Must be unittested.
"""
class UnicodeStream(object):
def __init__(self, stream, encoding):
self.stream = stream
self.encoding = encoding
def write(self, s):
if not isinstance(s, str):
s = (s.decode(self.encoding, 'replace') if dnf.pycomp.PY3 else
s.encode(self.encoding, 'replace'))
try:
self.stream.write(s)
except UnicodeEncodeError:
s_bytes = s.encode(self.stream.encoding, 'backslashreplace')
if hasattr(self.stream, 'buffer'):
self.stream.buffer.write(s_bytes)
else:
s = s_bytes.decode(self.stream.encoding, 'ignore')
self.stream.write(s)
def __getattr__(self, name):
return getattr(self.stream, name)
def _full_ucd_support(encoding):
"""Return true if encoding can express any Unicode character.
Even if an encoding can express all accented letters in the given language,
we can't generally settle for it in DNF since sometimes we output special
characters like the registered trademark symbol (U+00AE) and surprisingly
many national non-unicode encodings, including e.g. ASCII and ISO-8859-2,
don't contain it.
"""
if encoding is None:
return False
lower = encoding.lower()
if lower.startswith('utf-') or lower.startswith('utf_'):
return True
return False
def _guess_encoding():
""" Take the best shot at the current system's string encoding. """
encoding = locale.getpreferredencoding(False)
return 'utf-8' if encoding.startswith("ANSI") else encoding
def setup_locale():
try:
dnf.pycomp.setlocale(locale.LC_ALL, '')
except locale.Error:
# default to C.UTF-8 or C locale if we got a failure.
try:
dnf.pycomp.setlocale(locale.LC_ALL, 'C.UTF-8')
os.environ['LC_ALL'] = 'C.UTF-8'
except locale.Error:
dnf.pycomp.setlocale(locale.LC_ALL, 'C')
os.environ['LC_ALL'] = 'C'
print('Failed to set locale, defaulting to {}'.format(os.environ['LC_ALL']),
file=sys.stderr)
def setup_stdout():
""" Check that stdout is of suitable encoding and handle the situation if
not.
Returns True if stdout was of suitable encoding already and no changes
were needed.
"""
stdout = sys.stdout
if not stdout.isatty():
signal.signal(signal.SIGPIPE, signal.SIG_DFL)
try:
encoding = stdout.encoding
except AttributeError:
encoding = None
if not _full_ucd_support(encoding):
sys.stdout = UnicodeStream(stdout, _guess_encoding())
return False
return True
def ucd_input(ucstring):
# :api, deprecated in 2.0.0, will be erased when python2 is abandoned
""" It uses print instead of passing the prompt to raw_input.
raw_input doesn't encode the passed string and the output
goes into stderr
"""
print(ucstring, end='')
return dnf.pycomp.raw_input()
def ucd(obj):
# :api, deprecated in 2.0.0, will be erased when python2 is abandoned
""" Like the builtin unicode() but tries to use a reasonable encoding. """
if dnf.pycomp.PY3:
if dnf.pycomp.is_py3bytes(obj):
return str(obj, _guess_encoding(), errors='ignore')
elif isinstance(obj, str):
return obj
return str(obj)
else:
if isinstance(obj, dnf.pycomp.unicode):
return obj
if hasattr(obj, '__unicode__'):
# see the doc for the unicode() built-in. The logic here is: if obj
# implements __unicode__, let it take a crack at it, but handle the
# situation if it fails:
try:
return dnf.pycomp.unicode(obj)
except UnicodeError:
pass
return dnf.pycomp.unicode(str(obj), _guess_encoding(), errors='ignore')
# functions for formatting output according to terminal width,
# They should be used instead of build-in functions to count on different
# widths of Unicode characters
def _exact_width_char(uchar):
return 2 if unicodedata.east_asian_width(uchar) in ('W', 'F') else 1
def chop_str(msg, chop=None):
""" Return the textual width of a Unicode string, chopping it to
a specified value. This is what you want to use instead of %.*s, as it
does the "right" thing with regard to different Unicode character width
Eg. "%.*s" % (10, msg) <= becomes => "%s" % (chop_str(msg, 10)) """
if chop is None:
return exact_width(msg), msg
width = 0
chopped_msg = ""
for char in msg:
char_width = _exact_width_char(char)
if width + char_width > chop:
break
chopped_msg += char
width += char_width
return width, chopped_msg
def exact_width(msg):
""" Calculates width of char at terminal screen
(Asian char counts for two) """
return sum(_exact_width_char(c) for c in msg)
def fill_exact_width(msg, fill, chop=None, left=True, prefix='', suffix=''):
""" Expand a msg to a specified "width" or chop to same.
Expansion can be left or right. This is what you want to use instead of
%*.*s, as it does the "right" thing with regard to different Unicode
character width.
prefix and suffix should be used for "invisible" bytes, like
highlighting.
Examples:
``"%-*.*s" % (10, 20, msg)`` becomes
``"%s" % (fill_exact_width(msg, 10, 20))``.
``"%20.10s" % (msg)`` becomes
``"%s" % (fill_exact_width(msg, 20, 10, left=False))``.
``"%s%.10s%s" % (pre, msg, suf)`` becomes
``"%s" % (fill_exact_width(msg, 0, 10, prefix=pre, suffix=suf))``.
"""
width, msg = chop_str(msg, chop)
if width >= fill:
if prefix or suffix:
msg = ''.join([prefix, msg, suffix])
else:
extra = " " * (fill - width)
if left:
msg = ''.join([prefix, msg, suffix, extra])
else:
msg = ''.join([extra, prefix, msg, suffix])
return msg
def textwrap_fill(text, width=70, initial_indent='', subsequent_indent=''):
""" Works like we want textwrap.wrap() to work, uses Unicode strings
and doesn't screw up lists/blocks/etc. """
def _indent_at_beg(line):
count = 0
byte = 'X'
for byte in line:
if byte != ' ':
break
count += 1
if byte not in ("-", "*", ".", "o", '\xe2'):
return count, 0
list_chr = chop_str(line[count:], 1)[1]
if list_chr in ("-", "*", ".", "o",
"\u2022", "\u2023", "\u2218"):
nxt = _indent_at_beg(line[count+len(list_chr):])
nxt = nxt[1] or nxt[0]
if nxt:
return count, count + 1 + nxt
return count, 0
text = text.rstrip('\n')
lines = text.replace('\t', ' ' * 8).split('\n')
ret = []
indent = initial_indent
wrap_last = False
csab = 0
cspc_indent = 0
for line in lines:
line = line.rstrip(' ')
(lsab, lspc_indent) = (csab, cspc_indent)
(csab, cspc_indent) = _indent_at_beg(line)
force_nl = False # We want to stop wrapping under "certain" conditions:
if wrap_last and cspc_indent: # if line starts a list or
force_nl = True
if wrap_last and csab == len(line): # is empty line
force_nl = True
# if line doesn't continue a list and is "block indented"
if wrap_last and not lspc_indent:
if csab >= 4 and csab != lsab:
force_nl = True
if force_nl:
ret.append(indent.rstrip(' '))
indent = subsequent_indent
wrap_last = False
if csab == len(line): # empty line, remove spaces to make it easier.
line = ''
if wrap_last:
line = line.lstrip(' ')
cspc_indent = lspc_indent
if exact_width(indent + line) <= width:
wrap_last = False
ret.append(indent + line)
indent = subsequent_indent
continue
wrap_last = True
words = line.split(' ')
line = indent
spcs = cspc_indent
if not spcs and csab >= 4:
spcs = csab
for word in words:
if (width < exact_width(line + word)) and \
(exact_width(line) > exact_width(subsequent_indent)):
ret.append(line.rstrip(' '))
line = subsequent_indent + ' ' * spcs
line += word
line += ' '
indent = line.rstrip(' ') + ' '
if wrap_last:
ret.append(indent.rstrip(' '))
return '\n'.join(ret)
def select_short_long(width, msg_short, msg_long):
""" Automatically selects the short (abbreviated) or long (full) message
depending on whether we have enough screen space to display the full
message or not. If a caller by mistake passes a long string as
msg_short and a short string as a msg_long this function recognizes
the mistake and swaps the arguments. This function is especially useful
in the i18n context when you cannot predict how long are the translated
messages.
Limitations:
1. If msg_short is longer than width you will still get an overflow.
This function does not abbreviate the string.
2. You are not obliged to provide an actually abbreviated string, it is
perfectly correct to pass the same string twice if you don't want
any abbreviation. However, if you provide two different strings but
having the same width this function is unable to recognize which one
is correct and you should assume that it is unpredictable which one
is returned.
Example:
``select_short_long (10, _("Repo"), _("Repository"))``
will return "Repository" in English but the results in other languages
may be different. """
width_short = exact_width(msg_short)
width_long = exact_width(msg_long)
# If we have two strings of the same width:
if width_short == width_long:
return msg_long
# If the short string is wider than the long string:
elif width_short > width_long:
return msg_short if width_short <= width else msg_long
# The regular case:
else:
return msg_long if width_long <= width else msg_short
def translation(name):
# :api, deprecated in 2.0.0, will be erased when python2 is abandoned
""" Easy gettext translations setup based on given domain name """
setup_locale()
def ucd_wrapper(fnc):
return lambda *w: ucd(fnc(*w))
t = dnf.pycomp.gettext.translation(name, fallback=True)
return map(ucd_wrapper, dnf.pycomp.gettext_setup(t))
def pgettext(context, message):
result = _(context + chr(4) + message)
if "\004" in result:
return message
else:
return result
# setup translations
_, P_ = translation("dnf")
C_ = pgettext