"""
Pure-Python implementation of a Python 2-like str object for Python 3.
"""
from numbers import Integral
from past.utils import PY2, with_metaclass
if PY2:
from collections import Iterable
else:
from collections.abc import Iterable
_builtin_bytes = bytes
class BaseOldStr(type):
def __instancecheck__(cls, instance):
return isinstance(instance, _builtin_bytes)
def unescape(s):
r"""
Interprets strings with escape sequences
Example:
>>> s = unescape(r'abc\\def') # i.e. 'abc\\\\def'
>>> print(s)
'abc\def'
>>> s2 = unescape('abc\\ndef')
>>> len(s2)
8
>>> print(s2)
abc
def
"""
return s.encode().decode('unicode_escape')
class oldstr(with_metaclass(BaseOldStr, _builtin_bytes)):
"""
A forward port of the Python 2 8-bit string object to Py3
"""
# Python 2 strings have no __iter__ method:
@property
def __iter__(self):
raise AttributeError
def __dir__(self):
return [thing for thing in dir(_builtin_bytes) if thing != '__iter__']
# def __new__(cls, *args, **kwargs):
# """
# From the Py3 bytes docstring:
# bytes(iterable_of_ints) -> bytes
# bytes(string, encoding[, errors]) -> bytes
# bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer
# bytes(int) -> bytes object of size given by the parameter initialized with null bytes
# bytes() -> empty bytes object
#
# Construct an immutable array of bytes from:
# - an iterable yielding integers in range(256)
# - a text string encoded using the specified encoding
# - any object implementing the buffer API.
# - an integer
# """
#
# if len(args) == 0:
# return super(newbytes, cls).__new__(cls)
# # Was: elif isinstance(args[0], newbytes):
# # We use type() instead of the above because we're redefining
# # this to be True for all unicode string subclasses. Warning:
# # This may render newstr un-subclassable.
# elif type(args[0]) == newbytes:
# return args[0]
# elif isinstance(args[0], _builtin_bytes):
# value = args[0]
# elif isinstance(args[0], unicode):
# if 'encoding' not in kwargs:
# raise TypeError('unicode string argument without an encoding')
# ###
# # Was: value = args[0].encode(**kwargs)
# # Python 2.6 string encode() method doesn't take kwargs:
# # Use this instead:
# newargs = [kwargs['encoding']]
# if 'errors' in kwargs:
# newargs.append(kwargs['errors'])
# value = args[0].encode(*newargs)
# ###
# elif isinstance(args[0], Iterable):
# if len(args[0]) == 0:
# # What is this?
# raise ValueError('unknown argument type')
# elif len(args[0]) > 0 and isinstance(args[0][0], Integral):
# # It's a list of integers
# value = b''.join([chr(x) for x in args[0]])
# else:
# raise ValueError('item cannot be interpreted as an integer')
# elif isinstance(args[0], Integral):
# if args[0] < 0:
# raise ValueError('negative count')
# value = b'\x00' * args[0]
# else:
# value = args[0]
# return super(newbytes, cls).__new__(cls, value)
def __repr__(self):
s = super(oldstr, self).__repr__() # e.g. b'abc' on Py3, b'abc' on Py3
return s[1:]
def __str__(self):
s = super(oldstr, self).__str__() # e.g. "b'abc'" or "b'abc\\ndef'
# TODO: fix this:
assert s[:2] == "b'" and s[-1] == "'"
return unescape(s[2:-1]) # e.g. 'abc' or 'abc\ndef'
def __getitem__(self, y):
if isinstance(y, Integral):
return super(oldstr, self).__getitem__(slice(y, y+1))
else:
return super(oldstr, self).__getitem__(y)
def __getslice__(self, *args):
return self.__getitem__(slice(*args))
def __contains__(self, key):
if isinstance(key, int):
return False
def __native__(self):
return bytes(self)
__all__ = ['oldstr']