# -*- coding: utf-8 -*-
"""
babel.util
~~~~~~~~~~
Various utility classes and functions.
:copyright: (c) 2013 by the Babel Team.
:license: BSD, see LICENSE for more details.
"""
import codecs
from datetime import timedelta, tzinfo
import os
import re
import textwrap
from babel._compat import izip, imap
import pytz as _pytz
from babel import localtime
missing = object()
def distinct(iterable):
"""Yield all items in an iterable collection that are distinct.
Unlike when using sets for a similar effect, the original ordering of the
items in the collection is preserved by this function.
>>> print(list(distinct([1, 2, 1, 3, 4, 4])))
[1, 2, 3, 4]
>>> print(list(distinct('foobar')))
['f', 'o', 'b', 'a', 'r']
:param iterable: the iterable collection providing the data
"""
seen = set()
for item in iter(iterable):
if item not in seen:
yield item
seen.add(item)
# Regexp to match python magic encoding line
PYTHON_MAGIC_COMMENT_re = re.compile(
br'[ \t\f]* \# .* coding[=:][ \t]*([-\w.]+)', re.VERBOSE)
def parse_encoding(fp):
"""Deduce the encoding of a source file from magic comment.
It does this in the same way as the `Python interpreter`__
.. __: https://docs.python.org/3.4/reference/lexical_analysis.html#encoding-declarations
The ``fp`` argument should be a seekable file object.
(From Jeff Dairiki)
"""
pos = fp.tell()
fp.seek(0)
try:
line1 = fp.readline()
has_bom = line1.startswith(codecs.BOM_UTF8)
if has_bom:
line1 = line1[len(codecs.BOM_UTF8):]
m = PYTHON_MAGIC_COMMENT_re.match(line1)
if not m:
try:
import parser
parser.suite(line1.decode('latin-1'))
except (ImportError, SyntaxError, UnicodeEncodeError):
# Either it's a real syntax error, in which case the source is
# not valid python source, or line2 is a continuation of line1,
# in which case we don't want to scan line2 for a magic
# comment.
pass
else:
line2 = fp.readline()
m = PYTHON_MAGIC_COMMENT_re.match(line2)
if has_bom:
if m:
magic_comment_encoding = m.group(1).decode('latin-1')
if magic_comment_encoding != 'utf-8':
raise SyntaxError(
'encoding problem: {0} with BOM'.format(
magic_comment_encoding))
return 'utf-8'
elif m:
return m.group(1).decode('latin-1')
else:
return None
finally:
fp.seek(pos)
PYTHON_FUTURE_IMPORT_re = re.compile(
r'from\s+__future__\s+import\s+\(*(.+)\)*')
def parse_future_flags(fp, encoding='latin-1'):
"""Parse the compiler flags by :mod:`__future__` from the given Python
code.
"""
import __future__
pos = fp.tell()
fp.seek(0)
flags = 0
try:
body = fp.read().decode(encoding)
# Fix up the source to be (hopefully) parsable by regexpen.
# This will likely do untoward things if the source code itself is broken.
# (1) Fix `import (\n...` to be `import (...`.
body = re.sub(r'import\s*\([\r\n]+', 'import (', body)
# (2) Join line-ending commas with the next line.
body = re.sub(r',\s*[\r\n]+', ', ', body)
# (3) Remove backslash line continuations.
body = re.sub(r'\\\s*[\r\n]+', ' ', body)
for m in PYTHON_FUTURE_IMPORT_re.finditer(body):
names = [x.strip().strip('()') for x in m.group(1).split(',')]
for name in names:
feature = getattr(__future__, name, None)
if feature:
flags |= feature.compiler_flag
finally:
fp.seek(pos)
return flags
def pathmatch(pattern, filename):
"""Extended pathname pattern matching.
This function is similar to what is provided by the ``fnmatch`` module in
the Python standard library, but:
* can match complete (relative or absolute) path names, and not just file
names, and
* also supports a convenience pattern ("**") to match files at any
directory level.
Examples:
>>> pathmatch('**.py', 'bar.py')
True
>>> pathmatch('**.py', 'foo/bar/baz.py')
True
>>> pathmatch('**.py', 'templates/index.html')
False
>>> pathmatch('**/templates/*.html', 'templates/index.html')
True
>>> pathmatch('**/templates/*.html', 'templates/foo/bar.html')
False
:param pattern: the glob pattern
:param filename: the path name of the file to match against
"""
symbols = {
'?': '[^/]',
'?/': '[^/]/',
'*': '[^/]+',
'*/': '[^/]+/',
'**/': '(?:.+/)*?',
'**': '(?:.+/)*?[^/]+',
}
buf = []
for idx, part in enumerate(re.split('([?*]+/?)', pattern)):
if idx % 2:
buf.append(symbols[part])
elif part:
buf.append(re.escape(part))
match = re.match(''.join(buf) + '$', filename.replace(os.sep, '/'))
return match is not None
class TextWrapper(textwrap.TextWrapper):
wordsep_re = re.compile(
r'(\s+|' # any whitespace
r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))' # em-dash
)
def wraptext(text, width=70, initial_indent='', subsequent_indent=''):
"""Simple wrapper around the ``textwrap.wrap`` function in the standard
library. This version does not wrap lines on hyphens in words.
:param text: the text to wrap
:param width: the maximum line width
:param initial_indent: string that will be prepended to the first line of
wrapped output
:param subsequent_indent: string that will be prepended to all lines save
the first of wrapped output
"""
wrapper = TextWrapper(width=width, initial_indent=initial_indent,
subsequent_indent=subsequent_indent,
break_long_words=False)
return wrapper.wrap(text)
class odict(dict):
"""Ordered dict implementation.
:see: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/107747
"""
def __init__(self, data=None):
dict.__init__(self, data or {})
self._keys = list(dict.keys(self))
def __delitem__(self, key):
dict.__delitem__(self, key)
self._keys.remove(key)
def __setitem__(self, key, item):
new_key = key not in self
dict.__setitem__(self, key, item)
if new_key:
self._keys.append(key)
def __iter__(self):
return iter(self._keys)
iterkeys = __iter__
def clear(self):
dict.clear(self)
self._keys = []
def copy(self):
d = odict()
d.update(self)
return d
def items(self):
return zip(self._keys, self.values())
def iteritems(self):
return izip(self._keys, self.itervalues())
def keys(self):
return self._keys[:]
def pop(self, key, default=missing):
try:
value = dict.pop(self, key)
self._keys.remove(key)
return value
except KeyError as e:
if default == missing:
raise e
else:
return default
def popitem(self, key):
self._keys.remove(key)
return dict.popitem(key)
def setdefault(self, key, failobj=None):
dict.setdefault(self, key, failobj)
if key not in self._keys:
self._keys.append(key)
def update(self, dict):
for (key, val) in dict.items():
self[key] = val
def values(self):
return map(self.get, self._keys)
def itervalues(self):
return imap(self.get, self._keys)
class FixedOffsetTimezone(tzinfo):
"""Fixed offset in minutes east from UTC."""
def __init__(self, offset, name=None):
self._offset = timedelta(minutes=offset)
if name is None:
name = 'Etc/GMT%+d' % offset
self.zone = name
def __str__(self):
return self.zone
def __repr__(self):
return '<FixedOffset "%s" %s>' % (self.zone, self._offset)
def utcoffset(self, dt):
return self._offset
def tzname(self, dt):
return self.zone
def dst(self, dt):
return ZERO
# Export the localtime functionality here because that's
# where it was in the past.
UTC = _pytz.utc
LOCALTZ = localtime.LOCALTZ
get_localzone = localtime.get_localzone
STDOFFSET = localtime.STDOFFSET
DSTOFFSET = localtime.DSTOFFSET
DSTDIFF = localtime.DSTDIFF
ZERO = localtime.ZERO