# -*- coding: utf-8 -*-
#
# vim: sw=2 ts=2 sts=2
#
# Copyright 2004-2019 Mike Taylor
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""parsedatetime
Parse human-readable date/time text.
Requires Python 2.7 or later
"""
from __future__ import with_statement, absolute_import, unicode_literals
import re
import time
import logging
import warnings
import datetime
import calendar
import contextlib
import email.utils
from .pdt_locales import (locales as _locales,
get_icu, load_locale)
from .context import pdtContext, pdtContextStack
from .warns import pdt20DeprecationWarning
__author__ = 'Mike Taylor'
__email__ = 'bear@bear.im'
__copyright__ = 'Copyright (c) 2017 Mike Taylor'
__license__ = 'Apache License 2.0'
__version__ = '2.5'
__url__ = 'https://github.com/bear/parsedatetime'
__download_url__ = 'https://pypi.python.org/pypi/parsedatetime'
__description__ = 'Parse human-readable date/time text.'
# as a library, do *not* setup logging
# see docs.python.org/2/howto/logging.html#configuring-logging-for-a-library
# Set default logging handler to avoid "No handler found" warnings.
try: # Python 2.7+
from logging import NullHandler
except ImportError:
class NullHandler(logging.Handler):
def emit(self, record):
pass
log = logging.getLogger(__name__)
log.addHandler(NullHandler())
debug = False
pdtLocales = dict([(x, load_locale(x)) for x in _locales])
# Copied from feedparser.py
# Universal Feedparser
# Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
# Originally a def inside of _parse_date_w3dtf()
def _extract_date(m):
year = int(m.group('year'))
if year < 100:
year = 100 * int(time.gmtime()[0] / 100) + int(year)
if year < 1000:
return 0, 0, 0
julian = m.group('julian')
if julian:
julian = int(julian)
month = julian / 30 + 1
day = julian % 30 + 1
jday = None
while jday != julian:
t = time.mktime((year, month, day, 0, 0, 0, 0, 0, 0))
jday = time.gmtime(t)[-2]
diff = abs(jday - julian)
if jday > julian:
if diff < day:
day = day - diff
else:
month = month - 1
day = 31
elif jday < julian:
if day + diff < 28:
day = day + diff
else:
month = month + 1
return year, month, day
month = m.group('month')
day = 1
if month is None:
month = 1
else:
month = int(month)
day = m.group('day')
if day:
day = int(day)
else:
day = 1
return year, month, day
# Copied from feedparser.py
# Universal Feedparser
# Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
# Originally a def inside of _parse_date_w3dtf()
def _extract_time(m):
if not m:
return 0, 0, 0
hours = m.group('hours')
if not hours:
return 0, 0, 0
hours = int(hours)
minutes = int(m.group('minutes'))
seconds = m.group('seconds')
if seconds:
seconds = seconds.replace(',', '.').split('.', 1)[0]
seconds = int(seconds)
else:
seconds = 0
return hours, minutes, seconds
def _pop_time_accuracy(m, ctx):
if not m:
return
if m.group('hours'):
ctx.updateAccuracy(ctx.ACU_HOUR)
if m.group('minutes'):
ctx.updateAccuracy(ctx.ACU_MIN)
if m.group('seconds'):
ctx.updateAccuracy(ctx.ACU_SEC)
# Copied from feedparser.py
# Universal Feedparser
# Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
# Modified to return a tuple instead of mktime
#
# Original comment:
# W3DTF-style date parsing adapted from PyXML xml.utils.iso8601, written by
# Drake and licensed under the Python license. Removed all range checking
# for month, day, hour, minute, and second, since mktime will normalize
# these later
def __closure_parse_date_w3dtf():
# the __extract_date and __extract_time methods were
# copied-out so they could be used by my code --bear
def __extract_tzd(m):
'''Return the Time Zone Designator as an offset in seconds from UTC.'''
if not m:
return 0
tzd = m.group('tzd')
if not tzd:
return 0
if tzd == 'Z':
return 0
hours = int(m.group('tzdhours'))
minutes = m.group('tzdminutes')
if minutes:
minutes = int(minutes)
else:
minutes = 0
offset = (hours * 60 + minutes) * 60
if tzd[0] == '+':
return -offset
return offset
def _parse_date_w3dtf(dateString):
m = __datetime_rx.match(dateString)
if m is None or m.group() != dateString:
return
return _extract_date(m) + _extract_time(m) + (0, 0, 0)
__date_re = (r'(?P<year>\d\d\d\d)'
r'(?:(?P<dsep>-|)'
r'(?:(?P<julian>\d\d\d)'
r'|(?P<month>\d\d)(?:(?P=dsep)(?P<day>\d\d))?))?')
__tzd_re = r'(?P<tzd>[-+](?P<tzdhours>\d\d)(?::?(?P<tzdminutes>\d\d))|Z)'
# __tzd_rx = re.compile(__tzd_re)
__time_re = (r'(?P<hours>\d\d)(?P<tsep>:|)(?P<minutes>\d\d)'
r'(?:(?P=tsep)(?P<seconds>\d\d(?:[.,]\d+)?))?' + __tzd_re)
__datetime_re = '%s(?:T%s)?' % (__date_re, __time_re)
__datetime_rx = re.compile(__datetime_re)
return _parse_date_w3dtf
_parse_date_w3dtf = __closure_parse_date_w3dtf()
del __closure_parse_date_w3dtf
_monthnames = set([
'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
'aug', 'sep', 'oct', 'nov', 'dec',
'january', 'february', 'march', 'april', 'may', 'june', 'july',
'august', 'september', 'october', 'november', 'december'])
_daynames = set(['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'])
# Copied from feedparser.py
# Universal Feedparser
# Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
# Modified to return a tuple instead of mktime
def _parse_date_rfc822(dateString):
'''Parse an RFC822, RFC1123, RFC2822, or asctime-style date'''
data = dateString.split()
if data[0][-1] in (',', '.') or data[0].lower() in _daynames:
del data[0]
if len(data) == 4:
s = data[3]
s = s.split('+', 1)
if len(s) == 2:
data[3:] = s
else:
data.append('')
dateString = " ".join(data)
if len(data) < 5:
dateString += ' 00:00:00 GMT'
return email.utils.parsedate_tz(dateString)
# rfc822.py defines several time zones, but we define some extra ones.
# 'ET' is equivalent to 'EST', etc.
# _additional_timezones = {'AT': -400, 'ET': -500,
# 'CT': -600, 'MT': -700,
# 'PT': -800}
# email.utils._timezones.update(_additional_timezones)
VERSION_FLAG_STYLE = 1
VERSION_CONTEXT_STYLE = 2
class Calendar(object):
"""
A collection of routines to input, parse and manipulate date and times.
The text can either be 'normal' date values or it can be human readable.
"""
def __init__(self, constants=None, version=VERSION_FLAG_STYLE):
"""
Default constructor for the L{Calendar} class.
@type constants: object
@param constants: Instance of the class L{Constants}
@type version: integer
@param version: Default style version of current Calendar instance.
Valid value can be 1 (L{VERSION_FLAG_STYLE}) or
2 (L{VERSION_CONTEXT_STYLE}). See L{parse()}.
@rtype: object
@return: L{Calendar} instance
"""
# if a constants reference is not included, use default
if constants is None:
self.ptc = Constants()
else:
self.ptc = constants
self.version = version
if version == VERSION_FLAG_STYLE:
warnings.warn(
'Flag style will be deprecated in parsedatetime 2.0. '
'Instead use the context style by instantiating `Calendar()` '
'with argument `version=parsedatetime.VERSION_CONTEXT_STYLE`.',
pdt20DeprecationWarning)
self._ctxStack = pdtContextStack()
@contextlib.contextmanager
def context(self):
ctx = pdtContext()
self._ctxStack.push(ctx)
yield ctx
ctx = self._ctxStack.pop()
if not self._ctxStack.isEmpty():
self.currentContext.update(ctx)
@property
def currentContext(self):
return self._ctxStack.last()
def _convertUnitAsWords(self, unitText):
"""
Converts text units into their number value.
@type unitText: string
@param unitText: number text to convert
@rtype: integer
@return: numerical value of unitText
"""
word_list, a, b = re.split(r"[,\s-]+", unitText), 0, 0
for word in word_list:
x = self.ptc.small.get(word)
if x is not None:
a += x
elif word == "hundred":
a *= 100
else:
x = self.ptc.magnitude.get(word)
if x is not None:
b += a * x
a = 0
elif word in self.ptc.ignore:
pass
else:
raise Exception("Unknown number: " + word)
return a + b
def _buildTime(self, source, quantity, modifier, units):
"""
Take C{quantity}, C{modifier} and C{unit} strings and convert them
into values. After converting, calcuate the time and return the
adjusted sourceTime.
@type source: time
@param source: time to use as the base (or source)
@type quantity: string
@param quantity: quantity string
@type modifier: string
@param modifier: how quantity and units modify the source time
@type units: string
@param units: unit of the quantity (i.e. hours, days, months, etc)
@rtype: struct_time
@return: C{struct_time} of the calculated time
"""
ctx = self.currentContext
debug and log.debug('_buildTime: [%s][%s][%s]',
quantity, modifier, units)
if source is None:
source = time.localtime()
if quantity is None:
quantity = ''
else:
quantity = quantity.strip()
qty = self._quantityToReal(quantity)
if modifier in self.ptc.Modifiers:
qty = qty * self.ptc.Modifiers[modifier]
if units is None or units == '':
units = 'dy'
# plurals are handled by regex's (could be a bug tho)
(yr, mth, dy, hr, mn, sec, _, _, _) = source
start = datetime.datetime(yr, mth, dy, hr, mn, sec)
target = start
# realunit = next((key for key, values in self.ptc.units.items()
# if any(imap(units.__contains__, values))), None)
realunit = units
for key, values in self.ptc.units.items():
if units in values:
realunit = key
break
debug and log.debug('units %s --> realunit %s (qty=%s)',
units, realunit, qty)
try:
if realunit in ('years', 'months'):
target = self.inc(start, **{realunit[:-1]: qty})
elif realunit in ('days', 'hours', 'minutes', 'seconds', 'weeks'):
delta = datetime.timedelta(**{realunit: qty})
target = start + delta
except OverflowError:
# OverflowError is raise when target.year larger than 9999
pass
else:
ctx.updateAccuracy(realunit)
return target.timetuple()
def parseDate(self, dateString, sourceTime=None):
"""
Parse short-form date strings::
'05/28/2006' or '04.21'
@type dateString: string
@param dateString: text to convert to a C{datetime}
@type sourceTime: struct_time
@param sourceTime: C{struct_time} value to use as the base
@rtype: struct_time
@return: calculated C{struct_time} value of dateString
"""
if sourceTime is None:
yr, mth, dy, hr, mn, sec, wd, yd, isdst = time.localtime()
else:
yr, mth, dy, hr, mn, sec, wd, yd, isdst = sourceTime
# values pulled from regex's will be stored here and later
# assigned to mth, dy, yr based on information from the locale
# -1 is used as the marker value because we want zero values
# to be passed thru so they can be flagged as errors later
v1 = -1
v2 = -1
v3 = -1
accuracy = []
s = dateString
m = self.ptc.CRE_DATE2.search(s)
if m is not None:
index = m.start()
v1 = int(s[:index])
s = s[index + 1:]
m = self.ptc.CRE_DATE2.search(s)
if m is not None:
index = m.start()
v2 = int(s[:index])
v3 = int(s[index + 1:])
else:
v2 = int(s.strip())
v = [v1, v2, v3]
d = {'m': mth, 'd': dy, 'y': yr}
# yyyy/mm/dd format
dp_order = self.ptc.dp_order if v1 <= 31 else ['y', 'm', 'd']
for i in range(0, 3):
n = v[i]
c = dp_order[i]
if n >= 0:
d[c] = n
accuracy.append({'m': pdtContext.ACU_MONTH,
'd': pdtContext.ACU_DAY,
'y': pdtContext.ACU_YEAR}[c])
# if the year is not specified and the date has already
# passed, increment the year
if v3 == -1 and ((mth > d['m']) or (mth == d['m'] and dy > d['d'])):
yr = d['y'] + self.ptc.YearParseStyle
else:
yr = d['y']
mth = d['m']
dy = d['d']
# birthday epoch constraint
if yr < self.ptc.BirthdayEpoch:
yr += 2000
elif yr < 100:
yr += 1900
daysInCurrentMonth = self.ptc.daysInMonth(mth, yr)
debug and log.debug('parseDate: %s %s %s %s',
yr, mth, dy, daysInCurrentMonth)
with self.context() as ctx:
if mth > 0 and mth <= 12 and dy > 0 and \
dy <= daysInCurrentMonth:
sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst)
ctx.updateAccuracy(*accuracy)
else:
# return current time if date string is invalid
sourceTime = time.localtime()
return sourceTime
def parseDateText(self, dateString, sourceTime=None):
"""
Parse long-form date strings::
'May 31st, 2006'
'Jan 1st'
'July 2006'
@type dateString: string
@param dateString: text to convert to a datetime
@type sourceTime: struct_time
@param sourceTime: C{struct_time} value to use as the base
@rtype: struct_time
@return: calculated C{struct_time} value of dateString
"""
if sourceTime is None:
yr, mth, dy, hr, mn, sec, wd, yd, isdst = time.localtime()
else:
yr, mth, dy, hr, mn, sec, wd, yd, isdst = sourceTime
currentMth = mth
currentDy = dy
accuracy = []
debug and log.debug('parseDateText currentMth %s currentDy %s',
mth, dy)
s = dateString.lower()
m = self.ptc.CRE_DATE3.search(s)
mth = m.group('mthname')
mth = self.ptc.MonthOffsets[mth]
accuracy.append('month')
if m.group('day') is not None:
dy = int(m.group('day'))
accuracy.append('day')
else:
dy = 1
if m.group('year') is not None:
yr = int(m.group('year'))
accuracy.append('year')
# birthday epoch constraint
if yr < self.ptc.BirthdayEpoch:
yr += 2000
elif yr < 100:
yr += 1900
elif (mth < currentMth) or (mth == currentMth and dy < currentDy):
# if that day and month have already passed in this year,
# then increment the year by 1
yr += self.ptc.YearParseStyle
with self.context() as ctx:
if dy > 0 and dy <= self.ptc.daysInMonth(mth, yr):
sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst)
ctx.updateAccuracy(*accuracy)
else:
# Return current time if date string is invalid
sourceTime = time.localtime()
debug and log.debug('parseDateText returned '
'mth %d dy %d yr %d sourceTime %s',
mth, dy, yr, sourceTime)
return sourceTime
def evalRanges(self, datetimeString, sourceTime=None):
"""
Evaluate the C{datetimeString} text and determine if
it represents a date or time range.
@type datetimeString: string
@param datetimeString: datetime text to evaluate
@type sourceTime: struct_time
@param sourceTime: C{struct_time} value to use as the base
@rtype: tuple
@return: tuple of: start datetime, end datetime and the invalid flag
"""
rangeFlag = retFlag = 0
startStr = endStr = ''
s = datetimeString.strip().lower()
if self.ptc.rangeSep in s:
s = s.replace(self.ptc.rangeSep, ' %s ' % self.ptc.rangeSep)
s = s.replace(' ', ' ')
for cre, rflag in [(self.ptc.CRE_TIMERNG1, 1),
(self.ptc.CRE_TIMERNG2, 2),
(self.ptc.CRE_TIMERNG4, 7),
(self.ptc.CRE_TIMERNG3, 3),
(self.ptc.CRE_DATERNG1, 4),
(self.ptc.CRE_DATERNG2, 5),
(self.ptc.CRE_DATERNG3, 6)]:
m = cre.search(s)
if m is not None:
rangeFlag = rflag
break
debug and log.debug('evalRanges: rangeFlag = %s [%s]', rangeFlag, s)
if m is not None:
if (m.group() != s):
# capture remaining string
parseStr = m.group()
chunk1 = s[:m.start()]
chunk2 = s[m.end():]
s = '%s %s' % (chunk1, chunk2)
sourceTime, ctx = self.parse(s, sourceTime,
VERSION_CONTEXT_STYLE)
if not ctx.hasDateOrTime:
sourceTime = None
else:
parseStr = s
if rangeFlag in (1, 2):
m = re.search(self.ptc.rangeSep, parseStr)
startStr = parseStr[:m.start()]
endStr = parseStr[m.start() + 1:]
retFlag = 2
elif rangeFlag in (3, 7):
m = re.search(self.ptc.rangeSep, parseStr)
# capturing the meridian from the end time
if self.ptc.usesMeridian:
ampm = re.search(self.ptc.am[0], parseStr)
# appending the meridian to the start time
if ampm is not None:
startStr = parseStr[:m.start()] + self.ptc.meridian[0]
else:
startStr = parseStr[:m.start()] + self.ptc.meridian[1]
else:
startStr = parseStr[:m.start()]
endStr = parseStr[m.start() + 1:]
retFlag = 2
elif rangeFlag == 4:
m = re.search(self.ptc.rangeSep, parseStr)
startStr = parseStr[:m.start()]
endStr = parseStr[m.start() + 1:]
retFlag = 1
elif rangeFlag == 5:
m = re.search(self.ptc.rangeSep, parseStr)
endStr = parseStr[m.start() + 1:]
# capturing the year from the end date
date = self.ptc.CRE_DATE3.search(endStr)
endYear = date.group('year')
# appending the year to the start date if the start date
# does not have year information and the end date does.
# eg : "Aug 21 - Sep 4, 2007"
if endYear is not None:
startStr = (parseStr[:m.start()]).strip()
date = self.ptc.CRE_DATE3.search(startStr)
startYear = date.group('year')
if startYear is None:
startStr = startStr + ', ' + endYear
else:
startStr = parseStr[:m.start()]
retFlag = 1
elif rangeFlag == 6:
m = re.search(self.ptc.rangeSep, parseStr)
startStr = parseStr[:m.start()]
# capturing the month from the start date
mth = self.ptc.CRE_DATE3.search(startStr)
mth = mth.group('mthname')
# appending the month name to the end date
endStr = mth + parseStr[(m.start() + 1):]
retFlag = 1
else:
# if range is not found
startDT = endDT = time.localtime()
if retFlag:
startDT, sctx = self.parse(startStr, sourceTime,
VERSION_CONTEXT_STYLE)
endDT, ectx = self.parse(endStr, sourceTime,
VERSION_CONTEXT_STYLE)
if not sctx.hasDateOrTime or not ectx.hasDateOrTime:
retFlag = 0
return startDT, endDT, retFlag
def _CalculateDOWDelta(self, wd, wkdy, offset, style, currentDayStyle):
"""
Based on the C{style} and C{currentDayStyle} determine what
day-of-week value is to be returned.
@type wd: integer
@param wd: day-of-week value for the current day
@type wkdy: integer
@param wkdy: day-of-week value for the parsed day
@type offset: integer
@param offset: offset direction for any modifiers (-1, 0, 1)
@type style: integer
@param style: normally the value
set in C{Constants.DOWParseStyle}
@type currentDayStyle: integer
@param currentDayStyle: normally the value
set in C{Constants.CurrentDOWParseStyle}
@rtype: integer
@return: calculated day-of-week
"""
diffBase = wkdy - wd
origOffset = offset
if offset == 2:
# no modifier is present.
# i.e. string to be parsed is just DOW
if wkdy * style > wd * style or \
currentDayStyle and wkdy == wd:
# wkdy located in current week
offset = 0
elif style in (-1, 1):
# wkdy located in last (-1) or next (1) week
offset = style
else:
# invalid style, or should raise error?
offset = 0
# offset = -1 means last week
# offset = 0 means current week
# offset = 1 means next week
diff = diffBase + 7 * offset
if style == 1 and diff < -7:
diff += 7
elif style == -1 and diff > 7:
diff -= 7
debug and log.debug("wd %s, wkdy %s, offset %d, "
"style %d, currentDayStyle %d",
wd, wkdy, origOffset, style, currentDayStyle)
return diff
def _quantityToReal(self, quantity):
"""
Convert a quantity, either spelled-out or numeric, to a float
@type quantity: string
@param quantity: quantity to parse to float
@rtype: int
@return: the quantity as an float, defaulting to 0.0
"""
if not quantity:
return 1.0
try:
return float(quantity.replace(',', '.'))
except ValueError:
pass
try:
return float(self.ptc.numbers[quantity])
except KeyError:
pass
return 0.0
def _evalModifier(self, modifier, chunk1, chunk2, sourceTime):
"""
Evaluate the C{modifier} string and following text (passed in
as C{chunk1} and C{chunk2}) and if they match any known modifiers
calculate the delta and apply it to C{sourceTime}.
@type modifier: string
@param modifier: modifier text to apply to sourceTime
@type chunk1: string
@param chunk1: text chunk that preceded modifier (if any)
@type chunk2: string
@param chunk2: text chunk that followed modifier (if any)
@type sourceTime: struct_time
@param sourceTime: C{struct_time} value to use as the base
@rtype: tuple
@return: tuple of: remaining text and the modified sourceTime
"""
ctx = self.currentContext
offset = self.ptc.Modifiers[modifier]
if sourceTime is not None:
(yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime
else:
(yr, mth, dy, hr, mn, sec, wd, yd, isdst) = time.localtime()
if self.ptc.StartTimeFromSourceTime:
startHour = hr
startMinute = mn
startSecond = sec
else:
startHour = self.ptc.StartHour
startMinute = 0
startSecond = 0
# capture the units after the modifier and the remaining
# string after the unit
m = self.ptc.CRE_REMAINING.search(chunk2)
if m is not None:
index = m.start() + 1
unit = chunk2[:m.start()]
chunk2 = chunk2[index:]
else:
unit = chunk2
chunk2 = ''
debug and log.debug("modifier [%s] chunk1 [%s] "
"chunk2 [%s] unit [%s]",
modifier, chunk1, chunk2, unit)
if unit in self.ptc.units['months']:
currentDaysInMonth = self.ptc.daysInMonth(mth, yr)
if offset == 0:
dy = currentDaysInMonth
sourceTime = (yr, mth, dy, startHour, startMinute,
startSecond, wd, yd, isdst)
elif offset == 2:
# if day is the last day of the month, calculate the last day
# of the next month
if dy == currentDaysInMonth:
dy = self.ptc.daysInMonth(mth + 1, yr)
start = datetime.datetime(yr, mth, dy, startHour,
startMinute, startSecond)
target = self.inc(start, month=1)
sourceTime = target.timetuple()
else:
start = datetime.datetime(yr, mth, 1, startHour,
startMinute, startSecond)
target = self.inc(start, month=offset)
sourceTime = target.timetuple()
ctx.updateAccuracy(ctx.ACU_MONTH)
elif unit in self.ptc.units['weeks']:
if offset == 0:
start = datetime.datetime(yr, mth, dy, 17, 0, 0)
target = start + datetime.timedelta(days=(4 - wd))
sourceTime = target.timetuple()
elif offset == 2:
start = datetime.datetime(yr, mth, dy, startHour,
startMinute, startSecond)
target = start + datetime.timedelta(days=7)
sourceTime = target.timetuple()
else:
start = datetime.datetime(yr, mth, dy, startHour,
startMinute, startSecond)
target = start + offset * datetime.timedelta(weeks=1)
sourceTime = target.timetuple()
ctx.updateAccuracy(ctx.ACU_WEEK)
elif unit in self.ptc.units['days']:
if offset == 0:
sourceTime = (yr, mth, dy, 17, 0, 0, wd, yd, isdst)
ctx.updateAccuracy(ctx.ACU_HALFDAY)
elif offset == 2:
start = datetime.datetime(yr, mth, dy, hr, mn, sec)
target = start + datetime.timedelta(days=1)
sourceTime = target.timetuple()
else:
start = datetime.datetime(yr, mth, dy, startHour,
startMinute, startSecond)
target = start + datetime.timedelta(days=offset)
sourceTime = target.timetuple()
ctx.updateAccuracy(ctx.ACU_DAY)
elif unit in self.ptc.units['hours']:
if offset == 0:
sourceTime = (yr, mth, dy, hr, 0, 0, wd, yd, isdst)
else:
start = datetime.datetime(yr, mth, dy, hr, 0, 0)
target = start + datetime.timedelta(hours=offset)
sourceTime = target.timetuple()
ctx.updateAccuracy(ctx.ACU_HOUR)
elif unit in self.ptc.units['years']:
if offset == 0:
sourceTime = (yr, 12, 31, hr, mn, sec, wd, yd, isdst)
elif offset == 2:
sourceTime = (yr + 1, mth, dy, hr, mn, sec, wd, yd, isdst)
else:
sourceTime = (yr + offset, 1, 1, startHour, startMinute,
startSecond, wd, yd, isdst)
ctx.updateAccuracy(ctx.ACU_YEAR)
elif modifier == 'eom':
dy = self.ptc.daysInMonth(mth, yr)
sourceTime = (yr, mth, dy, startHour, startMinute,
startSecond, wd, yd, isdst)
ctx.updateAccuracy(ctx.ACU_DAY)
elif modifier == 'eoy':
mth = 12
dy = self.ptc.daysInMonth(mth, yr)
sourceTime = (yr, mth, dy, startHour, startMinute,
startSecond, wd, yd, isdst)
ctx.updateAccuracy(ctx.ACU_MONTH)
elif self.ptc.CRE_WEEKDAY.match(unit):
m = self.ptc.CRE_WEEKDAY.match(unit)
debug and log.debug('CRE_WEEKDAY matched')
wkdy = m.group()
if modifier == 'eod':
ctx.updateAccuracy(ctx.ACU_HOUR)
# Calculate the upcoming weekday
sourceTime, subctx = self.parse(wkdy, sourceTime,
VERSION_CONTEXT_STYLE)
sTime = self.ptc.getSource(modifier, sourceTime)
if sTime is not None:
sourceTime = sTime
ctx.updateAccuracy(ctx.ACU_HALFDAY)
else:
# unless one of these modifiers is being applied to the
# day-of-week, we want to start with target as the day
# in the current week.
dowOffset = offset
relativeModifier = modifier not in ['this', 'next', 'last', 'prior', 'previous']
if relativeModifier:
dowOffset = 0
wkdy = self.ptc.WeekdayOffsets[wkdy]
diff = self._CalculateDOWDelta(
wd, wkdy, dowOffset, self.ptc.DOWParseStyle,
self.ptc.CurrentDOWParseStyle)
start = datetime.datetime(yr, mth, dy, startHour,
startMinute, startSecond)
target = start + datetime.timedelta(days=diff)
if chunk1 != '' and relativeModifier:
# consider "one day before thursday": we need to parse chunk1 ("one day")
# and apply according to the offset ("before"), rather than allowing the
# remaining parse step to apply "one day" without the offset direction.
t, subctx = self.parse(chunk1, sourceTime, VERSION_CONTEXT_STYLE)
if subctx.hasDateOrTime:
delta = time.mktime(t) - time.mktime(sourceTime)
target = start + datetime.timedelta(days=diff) + datetime.timedelta(seconds=delta * offset)
chunk1 = ''
sourceTime = target.timetuple()
ctx.updateAccuracy(ctx.ACU_DAY)
elif chunk1 == '' and chunk2 == '' and self.ptc.CRE_TIME.match(unit):
m = self.ptc.CRE_TIME.match(unit)
debug and log.debug('CRE_TIME matched')
(yr, mth, dy, hr, mn, sec, wd, yd, isdst), subctx = \
self.parse(unit, None, VERSION_CONTEXT_STYLE)
start = datetime.datetime(yr, mth, dy, hr, mn, sec)
target = start + datetime.timedelta(days=offset)
sourceTime = target.timetuple()
else:
# check if the remaining text is parsable and if so,
# use it as the base time for the modifier source time
debug and log.debug('check for modifications '
'to source time [%s] [%s]',
chunk1, unit)
unit = unit.strip()
if unit:
s = '%s %s' % (unit, chunk2)
t, subctx = self.parse(s, sourceTime, VERSION_CONTEXT_STYLE)
if subctx.hasDate: # working with dates
u = unit.lower()
if u in self.ptc.Months or \
u in self.ptc.shortMonths:
yr, mth, dy, hr, mn, sec, wd, yd, isdst = t
start = datetime.datetime(
yr, mth, dy, hr, mn, sec)
t = self.inc(start, year=offset).timetuple()
elif u in self.ptc.Weekdays:
t = t + datetime.timedelta(weeks=offset)
if subctx.hasDateOrTime:
sourceTime = t
chunk2 = ''
chunk1 = chunk1.strip()
# if the word after next is a number, the string is more than
# likely to be "next 4 hrs" which we will have to combine the
# units with the rest of the string
if chunk1:
try:
m = list(self.ptc.CRE_NUMBER.finditer(chunk1))[-1]
except IndexError:
pass
else:
qty = None
debug and log.debug('CRE_NUMBER matched')
qty = self._quantityToReal(m.group()) * offset
chunk1 = '%s%s%s' % (chunk1[:m.start()],
qty, chunk1[m.end():])
t, subctx = self.parse(chunk1, sourceTime,
VERSION_CONTEXT_STYLE)
chunk1 = ''
if subctx.hasDateOrTime:
sourceTime = t
debug and log.debug('looking for modifier %s', modifier)
sTime = self.ptc.getSource(modifier, sourceTime)
if sTime is not None:
debug and log.debug('modifier found in sources')
sourceTime = sTime
ctx.updateAccuracy(ctx.ACU_HALFDAY)
debug and log.debug('returning chunk = "%s %s" and sourceTime = %s',
chunk1, chunk2, sourceTime)
return '%s %s' % (chunk1, chunk2), sourceTime
def _evalDT(self, datetimeString, sourceTime):
"""
Calculate the datetime from known format like RFC822 or W3CDTF
Examples handled::
RFC822, W3CDTF formatted dates
HH:MM[:SS][ am/pm]
MM/DD/YYYY
DD MMMM YYYY
@type datetimeString: string
@param datetimeString: text to try and parse as more "traditional"
date/time text
@type sourceTime: struct_time
@param sourceTime: C{struct_time} value to use as the base
@rtype: datetime
@return: calculated C{struct_time} value or current C{struct_time}
if not parsed
"""
ctx = self.currentContext
s = datetimeString.strip()
# Given string date is a RFC822 date
if sourceTime is None:
sourceTime = _parse_date_rfc822(s)
debug and log.debug(
'attempt to parse as rfc822 - %s', str(sourceTime))
if sourceTime is not None:
(yr, mth, dy, hr, mn, sec, wd, yd, isdst, _) = sourceTime
ctx.updateAccuracy(ctx.ACU_YEAR, ctx.ACU_MONTH, ctx.ACU_DAY)
if hr != 0 and mn != 0 and sec != 0:
ctx.updateAccuracy(ctx.ACU_HOUR, ctx.ACU_MIN, ctx.ACU_SEC)
sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst)
# Given string date is a W3CDTF date
if sourceTime is None:
sourceTime = _parse_date_w3dtf(s)
if sourceTime is not None:
ctx.updateAccuracy(ctx.ACU_YEAR, ctx.ACU_MONTH, ctx.ACU_DAY,
ctx.ACU_HOUR, ctx.ACU_MIN, ctx.ACU_SEC)
if sourceTime is None:
sourceTime = time.localtime()
return sourceTime
def _evalUnits(self, datetimeString, sourceTime):
"""
Evaluate text passed by L{_partialParseUnits()}
"""
s = datetimeString.strip()
sourceTime = self._evalDT(datetimeString, sourceTime)
# Given string is a time string with units like "5 hrs 30 min"
modifier = '' # TODO
m = self.ptc.CRE_UNITS.search(s)
if m is not None:
units = m.group('units')
quantity = s[:m.start('units')]
sourceTime = self._buildTime(sourceTime, quantity, modifier, units)
return sourceTime
def _evalQUnits(self, datetimeString, sourceTime):
"""
Evaluate text passed by L{_partialParseQUnits()}
"""
s = datetimeString.strip()
sourceTime = self._evalDT(datetimeString, sourceTime)
# Given string is a time string with single char units like "5 h 30 m"
modifier = '' # TODO
m = self.ptc.CRE_QUNITS.search(s)
if m is not None:
units = m.group('qunits')
quantity = s[:m.start('qunits')]
sourceTime = self._buildTime(sourceTime, quantity, modifier, units)
return sourceTime
def _evalDateStr(self, datetimeString, sourceTime):
"""
Evaluate text passed by L{_partialParseDateStr()}
"""
s = datetimeString.strip()
sourceTime = self._evalDT(datetimeString, sourceTime)
# Given string is in the format "May 23rd, 2005"
debug and log.debug('checking for MMM DD YYYY')
return self.parseDateText(s, sourceTime)
def _evalDateStd(self, datetimeString, sourceTime):
"""
Evaluate text passed by L{_partialParseDateStd()}
"""
s = datetimeString.strip()
sourceTime = self._evalDT(datetimeString, sourceTime)
# Given string is in the format 07/21/2006
return self.parseDate(s, sourceTime)
def _evalDayStr(self, datetimeString, sourceTime):
"""
Evaluate text passed by L{_partialParseDaystr()}
"""
s = datetimeString.strip()
sourceTime = self._evalDT(datetimeString, sourceTime)
# Given string is a natural language date string like today, tomorrow..
(yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime
try:
offset = self.ptc.dayOffsets[s]
except KeyError:
offset = 0
if self.ptc.StartTimeFromSourceTime:
startHour = hr
startMinute = mn
startSecond = sec
else:
startHour = self.ptc.StartHour
startMinute = 0
startSecond = 0
self.currentContext.updateAccuracy(pdtContext.ACU_DAY)
start = datetime.datetime(yr, mth, dy, startHour,
startMinute, startSecond)
target = start + datetime.timedelta(days=offset)
return target.timetuple()
def _evalWeekday(self, datetimeString, sourceTime):
"""
Evaluate text passed by L{_partialParseWeekday()}
"""
s = datetimeString.strip()
sourceTime = self._evalDT(datetimeString, sourceTime)
# Given string is a weekday
yr, mth, dy, hr, mn, sec, wd, yd, isdst = sourceTime
start = datetime.datetime(yr, mth, dy, hr, mn, sec)
wkdy = self.ptc.WeekdayOffsets[s]
if wkdy > wd:
qty = self._CalculateDOWDelta(wd, wkdy, 2,
self.ptc.DOWParseStyle,
self.ptc.CurrentDOWParseStyle)
else:
qty = self._CalculateDOWDelta(wd, wkdy, 2,
self.ptc.DOWParseStyle,
self.ptc.CurrentDOWParseStyle)
self.currentContext.updateAccuracy(pdtContext.ACU_DAY)
target = start + datetime.timedelta(days=qty)
return target.timetuple()
def _evalTimeStr(self, datetimeString, sourceTime):
"""
Evaluate text passed by L{_partialParseTimeStr()}
"""
s = datetimeString.strip()
sourceTime = self._evalDT(datetimeString, sourceTime)
if s in self.ptc.re_values['now']:
self.currentContext.updateAccuracy(pdtContext.ACU_NOW)
else:
# Given string is a natural language time string like
# lunch, midnight, etc
sTime = self.ptc.getSource(s, sourceTime)
if sTime:
sourceTime = sTime
self.currentContext.updateAccuracy(pdtContext.ACU_HALFDAY)
return sourceTime
def _evalMeridian(self, datetimeString, sourceTime):
"""
Evaluate text passed by L{_partialParseMeridian()}
"""
s = datetimeString.strip()
sourceTime = self._evalDT(datetimeString, sourceTime)
# Given string is in the format HH:MM(:SS)(am/pm)
yr, mth, dy, hr, mn, sec, wd, yd, isdst = sourceTime
m = self.ptc.CRE_TIMEHMS2.search(s)
if m is not None:
dt = s[:m.start('meridian')].strip()
if len(dt) <= 2:
hr = int(dt)
mn = 0
sec = 0
else:
hr, mn, sec = _extract_time(m)
if hr == 24:
hr = 0
meridian = m.group('meridian').lower()
# if 'am' found and hour is 12 - force hour to 0 (midnight)
if (meridian in self.ptc.am) and hr == 12:
hr = 0
# if 'pm' found and hour < 12, add 12 to shift to evening
if (meridian in self.ptc.pm) and hr < 12:
hr += 12
# time validation
if hr < 24 and mn < 60 and sec < 60:
sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst)
_pop_time_accuracy(m, self.currentContext)
return sourceTime
def _evalTimeStd(self, datetimeString, sourceTime):
"""
Evaluate text passed by L{_partialParseTimeStd()}
"""
s = datetimeString.strip()
sourceTime = self._evalDT(datetimeString, sourceTime)
# Given string is in the format HH:MM(:SS)
yr, mth, dy, hr, mn, sec, wd, yd, isdst = sourceTime
m = self.ptc.CRE_TIMEHMS.search(s)
if m is not None:
hr, mn, sec = _extract_time(m)
if hr == 24:
hr = 0
# time validation
if hr < 24 and mn < 60 and sec < 60:
sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst)
_pop_time_accuracy(m, self.currentContext)
return sourceTime
def _UnitsTrapped(self, s, m, key):
# check if a day suffix got trapped by a unit match
# for example Dec 31st would match for 31s (aka 31 seconds)
# Dec 31st
# ^ ^
# | +-- m.start('units')
# | and also m2.start('suffix')
# +---- m.start('qty')
# and also m2.start('day')
m2 = self.ptc.CRE_DAY2.search(s)
if m2 is not None:
t = '%s%s' % (m2.group('day'), m.group(key))
if m.start(key) == m2.start('suffix') and \
m.start('qty') == m2.start('day') and \
m.group('qty') == t:
return True
else:
return False
else:
return False
def _partialParseModifier(self, s, sourceTime):
"""
test if giving C{s} matched CRE_MODIFIER, used by L{parse()}
@type s: string
@param s: date/time text to evaluate
@type sourceTime: struct_time
@param sourceTime: C{struct_time} value to use as the base
@rtype: tuple
@return: tuple of remained date/time text, datetime object and
an boolean value to describ if matched or not
"""
parseStr = None
chunk1 = chunk2 = ''
# Modifier like next/prev/from/after/prior..
m = self.ptc.CRE_MODIFIER.search(s)
if m is not None:
if m.group() != s:
# capture remaining string
parseStr = m.group()
chunk1 = s[:m.start()].strip()
chunk2 = s[m.end():].strip()
else:
parseStr = s
if parseStr:
debug and log.debug('found (modifier) [%s][%s][%s]',
parseStr, chunk1, chunk2)
s, sourceTime = self._evalModifier(parseStr, chunk1,
chunk2, sourceTime)
return s, sourceTime, bool(parseStr)
def _partialParseUnits(self, s, sourceTime):
"""
test if giving C{s} matched CRE_UNITS, used by L{parse()}
@type s: string
@param s: date/time text to evaluate
@type sourceTime: struct_time
@param sourceTime: C{struct_time} value to use as the base
@rtype: tuple
@return: tuple of remained date/time text, datetime object and
an boolean value to describ if matched or not
"""
parseStr = None
chunk1 = chunk2 = ''
# Quantity + Units
m = self.ptc.CRE_UNITS.search(s)
if m is not None:
debug and log.debug('CRE_UNITS matched')
if self._UnitsTrapped(s, m, 'units'):
debug and log.debug('day suffix trapped by unit match')
else:
if (m.group('qty') != s):
# capture remaining string
parseStr = m.group('qty')
chunk1 = s[:m.start('qty')].strip()
chunk2 = s[m.end('qty'):].strip()
if chunk1[-1:] == '-':
parseStr = '-%s' % parseStr
chunk1 = chunk1[:-1]
s = '%s %s' % (chunk1, chunk2)
else:
parseStr = s
s = ''
if parseStr:
debug and log.debug('found (units) [%s][%s][%s]',
parseStr, chunk1, chunk2)
sourceTime = self._evalUnits(parseStr, sourceTime)
return s, sourceTime, bool(parseStr)
def _partialParseQUnits(self, s, sourceTime):
"""
test if giving C{s} matched CRE_QUNITS, used by L{parse()}
@type s: string
@param s: date/time text to evaluate
@type sourceTime: struct_time
@param sourceTime: C{struct_time} value to use as the base
@rtype: tuple
@return: tuple of remained date/time text, datetime object and
an boolean value to describ if matched or not
"""
parseStr = None
chunk1 = chunk2 = ''
# Quantity + Units
m = self.ptc.CRE_QUNITS.search(s)
if m is not None:
debug and log.debug('CRE_QUNITS matched')
if self._UnitsTrapped(s, m, 'qunits'):
debug and log.debug(
'day suffix trapped by qunit match')
else:
if (m.group('qty') != s):
# capture remaining string
parseStr = m.group('qty')
chunk1 = s[:m.start('qty')].strip()
chunk2 = s[m.end('qty'):].strip()
if chunk1[-1:] == '-':
parseStr = '-%s' % parseStr
chunk1 = chunk1[:-1]
s = '%s %s' % (chunk1, chunk2)
else:
parseStr = s
s = ''
if parseStr:
debug and log.debug('found (qunits) [%s][%s][%s]',
parseStr, chunk1, chunk2)
sourceTime = self._evalQUnits(parseStr, sourceTime)
return s, sourceTime, bool(parseStr)
def _partialParseDateStr(self, s, sourceTime):
"""
test if giving C{s} matched CRE_DATE3, used by L{parse()}
@type s: string
@param s: date/time text to evaluate
@type sourceTime: struct_time
@param sourceTime: C{struct_time} value to use as the base
@rtype: tuple
@return: tuple of remained date/time text, datetime object and
an boolean value to describ if matched or not
"""
parseStr = None
chunk1 = chunk2 = ''
m = self.ptc.CRE_DATE3.search(s)
# NO LONGER NEEDED, THE REGEXP HANDLED MTHNAME NOW
# for match in self.ptc.CRE_DATE3.finditer(s):
# to prevent "HH:MM(:SS) time strings" expressions from
# triggering this regex, we checks if the month field
# exists in the searched expression, if it doesn't exist,
# the date field is not valid
# if match.group('mthname'):
# m = self.ptc.CRE_DATE3.search(s, match.start())
# valid_date = True
# break
# String date format
if m is not None:
if (m.group('date') != s):
# capture remaining string
mStart = m.start('date')
mEnd = m.end('date')
# we need to check that anything following the parsed
# date is a time expression because it is often picked
# up as a valid year if the hour is 2 digits
fTime = False
mm = self.ptc.CRE_TIMEHMS2.search(s)
# "February 24th 1PM" doesn't get caught
# "February 24th 12PM" does
mYear = m.group('year')
if mm is not None and mYear is not None:
fTime = True
else:
# "February 24th 12:00"
mm = self.ptc.CRE_TIMEHMS.search(s)
if mm is not None and mYear is None:
fTime = True
if fTime:
hoursStart = mm.start('hours')
if hoursStart < m.end('year'):
mEnd = hoursStart
parseStr = s[mStart:mEnd]
chunk1 = s[:mStart]
chunk2 = s[mEnd:]
s = '%s %s' % (chunk1, chunk2)
else:
parseStr = s
s = ''
if parseStr:
debug and log.debug(
'found (date3) [%s][%s][%s]', parseStr, chunk1, chunk2)
sourceTime = self._evalDateStr(parseStr, sourceTime)
return s, sourceTime, bool(parseStr)
def _partialParseDateStd(self, s, sourceTime):
"""
test if giving C{s} matched CRE_DATE, used by L{parse()}
@type s: string
@param s: date/time text to evaluate
@type sourceTime: struct_time
@param sourceTime: C{struct_time} value to use as the base
@rtype: tuple
@return: tuple of remained date/time text, datetime object and
an boolean value to describ if matched or not
"""
parseStr = None
chunk1 = chunk2 = ''
# Standard date format
m = self.ptc.CRE_DATE.search(s)
if m is not None:
if (m.group('date') != s):
# capture remaining string
parseStr = m.group('date')
chunk1 = s[:m.start('date')]
chunk2 = s[m.end('date'):]
s = '%s %s' % (chunk1, chunk2)
else:
parseStr = s
s = ''
if parseStr:
debug and log.debug(
'found (date) [%s][%s][%s]', parseStr, chunk1, chunk2)
sourceTime = self._evalDateStd(parseStr, sourceTime)
return s, sourceTime, bool(parseStr)
def _partialParseDayStr(self, s, sourceTime):
"""
test if giving C{s} matched CRE_DAY, used by L{parse()}
@type s: string
@param s: date/time text to evaluate
@type sourceTime: struct_time
@param sourceTime: C{struct_time} value to use as the base
@rtype: tuple
@return: tuple of remained date/time text, datetime object and
an boolean value to describ if matched or not
"""
parseStr = None
chunk1 = chunk2 = ''
# Natural language day strings
m = self.ptc.CRE_DAY.search(s)
if m is not None:
if (m.group() != s):
# capture remaining string
parseStr = m.group()
chunk1 = s[:m.start()]
chunk2 = s[m.end():]
s = '%s %s' % (chunk1, chunk2)
else:
parseStr = s
s = ''
if parseStr:
debug and log.debug(
'found (day) [%s][%s][%s]', parseStr, chunk1, chunk2)
sourceTime = self._evalDayStr(parseStr, sourceTime)
return s, sourceTime, bool(parseStr)
def _partialParseWeekday(self, s, sourceTime):
"""
test if giving C{s} matched CRE_WEEKDAY, used by L{parse()}
@type s: string
@param s: date/time text to evaluate
@type sourceTime: struct_time
@param sourceTime: C{struct_time} value to use as the base
@rtype: tuple
@return: tuple of remained date/time text, datetime object and
an boolean value to describ if matched or not
"""
parseStr = None
chunk1 = chunk2 = ''
ctx = self.currentContext
log.debug('eval %s with context - %s, %s', s, ctx.hasDate, ctx.hasTime)
# Weekday
m = self.ptc.CRE_WEEKDAY.search(s)
if m is not None:
gv = m.group()
if s not in self.ptc.dayOffsets:
if (gv != s):
# capture remaining string
parseStr = gv
chunk1 = s[:m.start()]
chunk2 = s[m.end():]
s = '%s %s' % (chunk1, chunk2)
else:
parseStr = s
s = ''
if parseStr and not ctx.hasDate:
debug and log.debug(
'found (weekday) [%s][%s][%s]', parseStr, chunk1, chunk2)
sourceTime = self._evalWeekday(parseStr, sourceTime)
return s, sourceTime, bool(parseStr)
def _partialParseTimeStr(self, s, sourceTime):
"""
test if giving C{s} matched CRE_TIME, used by L{parse()}
@type s: string
@param s: date/time text to evaluate
@type sourceTime: struct_time
@param sourceTime: C{struct_time} value to use as the base
@rtype: tuple
@return: tuple of remained date/time text, datetime object and
an boolean value to describ if matched or not
"""
parseStr = None
chunk1 = chunk2 = ''
# Natural language time strings
m = self.ptc.CRE_TIME.search(s)
if m is not None or s in self.ptc.re_values['now']:
if (m and m.group() != s):
# capture remaining string
parseStr = m.group()
chunk1 = s[:m.start()]
chunk2 = s[m.end():]
s = '%s %s' % (chunk1, chunk2)
else:
parseStr = s
s = ''
if parseStr:
debug and log.debug(
'found (time) [%s][%s][%s]', parseStr, chunk1, chunk2)
sourceTime = self._evalTimeStr(parseStr, sourceTime)
return s, sourceTime, bool(parseStr)
def _partialParseMeridian(self, s, sourceTime):
"""
test if giving C{s} matched CRE_TIMEHMS2, used by L{parse()}
@type s: string
@param s: date/time text to evaluate
@type sourceTime: struct_time
@param sourceTime: C{struct_time} value to use as the base
@rtype: tuple
@return: tuple of remained date/time text, datetime object and
an boolean value to describ if matched or not
"""
parseStr = None
chunk1 = chunk2 = ''
# HH:MM(:SS) am/pm time strings
m = self.ptc.CRE_TIMEHMS2.search(s)
if m is not None:
if m.group('minutes') is not None:
if m.group('seconds') is not None:
parseStr = '%s:%s:%s' % (m.group('hours'),
m.group('minutes'),
m.group('seconds'))
else:
parseStr = '%s:%s' % (m.group('hours'),
m.group('minutes'))
else:
parseStr = m.group('hours')
parseStr += ' ' + m.group('meridian')
chunk1 = s[:m.start()]
chunk2 = s[m.end():]
s = '%s %s' % (chunk1, chunk2)
if parseStr:
debug and log.debug('found (meridian) [%s][%s][%s]',
parseStr, chunk1, chunk2)
sourceTime = self._evalMeridian(parseStr, sourceTime)
return s, sourceTime, bool(parseStr)
def _partialParseTimeStd(self, s, sourceTime):
"""
test if giving C{s} matched CRE_TIMEHMS, used by L{parse()}
@type s: string
@param s: date/time text to evaluate
@type sourceTime: struct_time
@param sourceTime: C{struct_time} value to use as the base
@rtype: tuple
@return: tuple of remained date/time text, datetime object and
an boolean value to describ if matched or not
"""
parseStr = None
chunk1 = chunk2 = ''
# HH:MM(:SS) time strings
m = self.ptc.CRE_TIMEHMS.search(s)
if m is not None:
if m.group('seconds') is not None:
parseStr = '%s:%s:%s' % (m.group('hours'),
m.group('minutes'),
m.group('seconds'))
chunk1 = s[:m.start('hours')]
chunk2 = s[m.end('seconds'):]
else:
parseStr = '%s:%s' % (m.group('hours'),
m.group('minutes'))
chunk1 = s[:m.start('hours')]
chunk2 = s[m.end('minutes'):]
s = '%s %s' % (chunk1, chunk2)
if parseStr:
debug and log.debug(
'found (hms) [%s][%s][%s]', parseStr, chunk1, chunk2)
sourceTime = self._evalTimeStd(parseStr, sourceTime)
return s, sourceTime, bool(parseStr)
def parseDT(self, datetimeString, sourceTime=None,
tzinfo=None, version=None):
"""
C{datetimeString} is as C{.parse}, C{sourceTime} has the same semantic
meaning as C{.parse}, but now also accepts datetime objects. C{tzinfo}
accepts a tzinfo object. It is advisable to use pytz.
@type datetimeString: string
@param datetimeString: date/time text to evaluate
@type sourceTime: struct_time, datetime, date, time
@param sourceTime: time value to use as the base
@type tzinfo: tzinfo
@param tzinfo: Timezone to apply to generated datetime objs.
@type version: integer
@param version: style version, default will use L{Calendar}
parameter version value
@rtype: tuple
@return: tuple of: modified C{sourceTime} and the result flag/context
see .parse for return code details.
"""
# if sourceTime has a timetuple method, use thet, else, just pass the
# entire thing to parse and prey the user knows what the hell they are
# doing.
sourceTime = getattr(sourceTime, 'timetuple', (lambda: sourceTime))()
# You REALLY SHOULD be using pytz. Using localize if available,
# hacking if not. Note, None is a valid tzinfo object in the case of
# the ugly hack.
localize = getattr(
tzinfo,
'localize',
(lambda dt: dt.replace(tzinfo=tzinfo)), # ugly hack is ugly :(
)
# Punt
time_struct, ret_code = self.parse(
datetimeString,
sourceTime=sourceTime,
version=version)
# Comments from GHI indicate that it is desired to have the same return
# signature on this method as that one it punts to, with the exception
# of using datetime objects instead of time_structs.
dt = localize(datetime.datetime(*time_struct[:6]))
return dt, ret_code
def parse(self, datetimeString, sourceTime=None, version=None):
"""
Splits the given C{datetimeString} into tokens, finds the regex
patterns that match and then calculates a C{struct_time} value from
the chunks.
If C{sourceTime} is given then the C{struct_time} value will be
calculated from that value, otherwise from the current date/time.
If the C{datetimeString} is parsed and date/time value found, then::
If C{version} equals to L{VERSION_FLAG_STYLE}, the second item of
the returned tuple will be a flag to let you know what kind of
C{struct_time} value is being returned::
0 = not parsed at all
1 = parsed as a C{date}
2 = parsed as a C{time}
3 = parsed as a C{datetime}
If C{version} equals to L{VERSION_CONTEXT_STYLE}, the second value
will be an instance of L{pdtContext}
@type datetimeString: string
@param datetimeString: date/time text to evaluate
@type sourceTime: struct_time
@param sourceTime: C{struct_time} value to use as the base
@type version: integer
@param version: style version, default will use L{Calendar}
parameter version value
@rtype: tuple
@return: tuple of: modified C{sourceTime} and the result flag/context
"""
debug and log.debug('parse()')
datetimeString = re.sub(r'(\w)\.(\s)', r'\1\2', datetimeString)
datetimeString = re.sub(r'(\w)[\'"](\s|$)', r'\1 \2', datetimeString)
datetimeString = re.sub(r'(\s|^)[\'"](\w)', r'\1 \2', datetimeString)
if sourceTime:
if isinstance(sourceTime, datetime.datetime):
debug and log.debug('coercing datetime to timetuple')
sourceTime = sourceTime.timetuple()
else:
if not isinstance(sourceTime, time.struct_time) and \
not isinstance(sourceTime, tuple):
raise ValueError('sourceTime is not a struct_time')
else:
sourceTime = time.localtime()
with self.context() as ctx:
s = datetimeString.lower().strip()
debug and log.debug('remainedString (before parsing): [%s]', s)
while s:
for parseMeth in (self._partialParseModifier,
self._partialParseUnits,
self._partialParseQUnits,
self._partialParseDateStr,
self._partialParseDateStd,
self._partialParseDayStr,
self._partialParseWeekday,
self._partialParseTimeStr,
self._partialParseMeridian,
self._partialParseTimeStd):
retS, retTime, matched = parseMeth(s, sourceTime)
if matched:
s, sourceTime = retS.strip(), retTime
break
else:
# nothing matched
s = ''
debug and log.debug('hasDate: [%s], hasTime: [%s]',
ctx.hasDate, ctx.hasTime)
debug and log.debug('remainedString: [%s]', s)
# String is not parsed at all
if sourceTime is None:
debug and log.debug('not parsed [%s]', str(sourceTime))
sourceTime = time.localtime()
if not isinstance(sourceTime, time.struct_time):
sourceTime = time.struct_time(sourceTime)
version = self.version if version is None else version
if version == VERSION_CONTEXT_STYLE:
return sourceTime, ctx
else:
return sourceTime, ctx.dateTimeFlag
def inc(self, source, month=None, year=None):
"""
Takes the given C{source} date, or current date if none is
passed, and increments it according to the values passed in
by month and/or year.
This routine is needed because Python's C{timedelta()} function
does not allow for month or year increments.
@type source: struct_time
@param source: C{struct_time} value to increment
@type month: float or integer
@param month: optional number of months to increment
@type year: float or integer
@param year: optional number of years to increment
@rtype: datetime
@return: C{source} incremented by the number of months and/or years
"""
yr = source.year
mth = source.month
dy = source.day
try:
month = float(month)
except (TypeError, ValueError):
month = 0
try:
year = float(year)
except (TypeError, ValueError):
year = 0
finally:
month += year * 12
year = 0
subMi = 0.0
maxDay = 0
if month:
mi = int(month)
subMi = month - mi
y = int(mi / 12.0)
m = mi - y * 12
mth = mth + m
if mth < 1: # cross start-of-year?
y -= 1 # yes - decrement year
mth += 12 # and fix month
elif mth > 12: # cross end-of-year?
y += 1 # yes - increment year
mth -= 12 # and fix month
yr += y
# if the day ends up past the last day of
# the new month, set it to the last day
maxDay = self.ptc.daysInMonth(mth, yr)
if dy > maxDay:
dy = maxDay
if yr > datetime.MAXYEAR or yr < datetime.MINYEAR:
raise OverflowError('year is out of range')
d = source.replace(year=yr, month=mth, day=dy)
if subMi:
d += datetime.timedelta(days=subMi * maxDay)
return source + (d - source)
def nlp(self, inputString, sourceTime=None, version=None):
"""Utilizes parse() after making judgements about what datetime
information belongs together.
It makes logical groupings based on proximity and returns a parsed
datetime for each matched grouping of datetime text, along with
location info within the given inputString.
@type inputString: string
@param inputString: natural language text to evaluate
@type sourceTime: struct_time
@param sourceTime: C{struct_time} value to use as the base
@type version: integer
@param version: style version, default will use L{Calendar}
parameter version value
@rtype: tuple or None
@return: tuple of tuples in the format (parsed_datetime as
datetime.datetime, flags as int, start_pos as int,
end_pos as int, matched_text as string) or None if there
were no matches
"""
orig_inputstring = inputString
# replace periods at the end of sentences w/ spaces
# opposed to removing them altogether in order to
# retain relative positions (identified by alpha, period, space).
# this is required for some of the regex patterns to match
inputString = re.sub(r'(\w)(\.)(\s)', r'\1 \3', inputString).lower()
inputString = re.sub(r'(\w)(\'|")(\s|$)', r'\1 \3', inputString)
inputString = re.sub(r'(\s|^)(\'|")(\w)', r'\1 \3', inputString)
startpos = 0 # the start position in the inputString during the loop
# list of lists in format:
# [startpos, endpos, matchedstring, flags, type]
matches = []
while startpos < len(inputString):
# empty match
leftmost_match = [0, 0, None, 0, None]
# Modifier like next\prev..
m = self.ptc.CRE_MODIFIER.search(inputString[startpos:])
if m is not None:
if leftmost_match[1] == 0 or \
leftmost_match[0] > m.start() + startpos:
leftmost_match[0] = m.start() + startpos
leftmost_match[1] = m.end() + startpos
leftmost_match[2] = m.group()
leftmost_match[3] = 0
leftmost_match[4] = 'modifier'
# Quantity + Units
m = self.ptc.CRE_UNITS.search(inputString[startpos:])
if m is not None:
debug and log.debug('CRE_UNITS matched')
if self._UnitsTrapped(inputString[startpos:], m, 'units'):
debug and log.debug('day suffix trapped by unit match')
else:
if leftmost_match[1] == 0 or \
leftmost_match[0] > m.start('qty') + startpos:
leftmost_match[0] = m.start('qty') + startpos
leftmost_match[1] = m.end('qty') + startpos
leftmost_match[2] = m.group('qty')
leftmost_match[3] = 3
leftmost_match[4] = 'units'
if m.start('qty') > 0 and \
inputString[m.start('qty') - 1] == '-':
leftmost_match[0] = leftmost_match[0] - 1
leftmost_match[2] = '-' + leftmost_match[2]
# Quantity + Units
m = self.ptc.CRE_QUNITS.search(inputString[startpos:])
if m is not None:
debug and log.debug('CRE_QUNITS matched')
if self._UnitsTrapped(inputString[startpos:], m, 'qunits'):
debug and log.debug('day suffix trapped by qunit match')
else:
if leftmost_match[1] == 0 or \
leftmost_match[0] > m.start('qty') + startpos:
leftmost_match[0] = m.start('qty') + startpos
leftmost_match[1] = m.end('qty') + startpos
leftmost_match[2] = m.group('qty')
leftmost_match[3] = 3
leftmost_match[4] = 'qunits'
if m.start('qty') > 0 and \
inputString[m.start('qty') - 1] == '-':
leftmost_match[0] = leftmost_match[0] - 1
leftmost_match[2] = '-' + leftmost_match[2]
m = self.ptc.CRE_DATE3.search(inputString[startpos:])
# NO LONGER NEEDED, THE REGEXP HANDLED MTHNAME NOW
# for match in self.ptc.CRE_DATE3.finditer(inputString[startpos:]):
# to prevent "HH:MM(:SS) time strings" expressions from
# triggering this regex, we checks if the month field exists
# in the searched expression, if it doesn't exist, the date
# field is not valid
# if match.group('mthname'):
# m = self.ptc.CRE_DATE3.search(inputString[startpos:],
# match.start())
# break
# String date format
if m is not None:
if leftmost_match[1] == 0 or \
leftmost_match[0] > m.start('date') + startpos:
leftmost_match[0] = m.start('date') + startpos
leftmost_match[1] = m.end('date') + startpos
leftmost_match[2] = m.group('date')
leftmost_match[3] = 1
leftmost_match[4] = 'dateStr'
# Standard date format
m = self.ptc.CRE_DATE.search(inputString[startpos:])
if m is not None:
if leftmost_match[1] == 0 or \
leftmost_match[0] > m.start('date') + startpos:
leftmost_match[0] = m.start('date') + startpos
leftmost_match[1] = m.end('date') + startpos
leftmost_match[2] = m.group('date')
leftmost_match[3] = 1
leftmost_match[4] = 'dateStd'
# Natural language day strings
m = self.ptc.CRE_DAY.search(inputString[startpos:])
if m is not None:
if leftmost_match[1] == 0 or \
leftmost_match[0] > m.start() + startpos:
leftmost_match[0] = m.start() + startpos
leftmost_match[1] = m.end() + startpos
leftmost_match[2] = m.group()
leftmost_match[3] = 1
leftmost_match[4] = 'dayStr'
# Weekday
m = self.ptc.CRE_WEEKDAY.search(inputString[startpos:])
if m is not None:
if inputString[startpos:] not in self.ptc.dayOffsets:
if leftmost_match[1] == 0 or \
leftmost_match[0] > m.start() + startpos:
leftmost_match[0] = m.start() + startpos
leftmost_match[1] = m.end() + startpos
leftmost_match[2] = m.group()
leftmost_match[3] = 1
leftmost_match[4] = 'weekdy'
# Natural language time strings
m = self.ptc.CRE_TIME.search(inputString[startpos:])
if m is not None:
if leftmost_match[1] == 0 or \
leftmost_match[0] > m.start() + startpos:
leftmost_match[0] = m.start() + startpos
leftmost_match[1] = m.end() + startpos
leftmost_match[2] = m.group()
leftmost_match[3] = 2
leftmost_match[4] = 'timeStr'
# HH:MM(:SS) am/pm time strings
m = self.ptc.CRE_TIMEHMS2.search(inputString[startpos:])
if m is not None:
if leftmost_match[1] == 0 or \
leftmost_match[0] > m.start('hours') + startpos:
leftmost_match[0] = m.start('hours') + startpos
leftmost_match[1] = m.end('meridian') + startpos
leftmost_match[2] = inputString[leftmost_match[0]:
leftmost_match[1]]
leftmost_match[3] = 2
leftmost_match[4] = 'meridian'
# HH:MM(:SS) time strings
m = self.ptc.CRE_TIMEHMS.search(inputString[startpos:])
if m is not None:
if leftmost_match[1] == 0 or \
leftmost_match[0] > m.start('hours') + startpos:
leftmost_match[0] = m.start('hours') + startpos
if m.group('seconds') is not None:
leftmost_match[1] = m.end('seconds') + startpos
else:
leftmost_match[1] = m.end('minutes') + startpos
leftmost_match[2] = inputString[leftmost_match[0]:
leftmost_match[1]]
leftmost_match[3] = 2
leftmost_match[4] = 'timeStd'
# Units only; must be preceded by a modifier
if len(matches) > 0 and matches[-1][3] == 0:
m = self.ptc.CRE_UNITS_ONLY.search(inputString[startpos:])
# Ensure that any match is immediately proceded by the
# modifier. "Next is the word 'month'" should not parse as a
# date while "next month" should
if m is not None and \
inputString[startpos:startpos + m.start()].strip() == '':
debug and log.debug('CRE_UNITS_ONLY matched [%s]',
m.group())
if leftmost_match[1] == 0 or \
leftmost_match[0] > m.start() + startpos:
leftmost_match[0] = m.start() + startpos
leftmost_match[1] = m.end() + startpos
leftmost_match[2] = m.group()
leftmost_match[3] = 3
leftmost_match[4] = 'unitsOnly'
# set the start position to the end pos of the leftmost match
startpos = leftmost_match[1]
# nothing was detected
# so break out of the loop
if startpos == 0:
startpos = len(inputString)
else:
if leftmost_match[3] > 0:
m = self.ptc.CRE_NLP_PREFIX.search(
inputString[:leftmost_match[0]] + ' ' + str(leftmost_match[3]))
if m is not None:
leftmost_match[0] = m.start('nlp_prefix')
leftmost_match[2] = inputString[leftmost_match[0]:
leftmost_match[1]]
matches.append(leftmost_match)
# find matches in proximity with one another and
# return all the parsed values
proximity_matches = []
if len(matches) > 1:
combined = ''
from_match_index = 0
date = matches[0][3] == 1
time = matches[0][3] == 2
units = matches[0][3] == 3
for i in range(1, len(matches)):
# test proximity (are there characters between matches?)
endofprevious = matches[i - 1][1]
begofcurrent = matches[i][0]
if orig_inputstring[endofprevious:
begofcurrent].lower().strip() != '':
# this one isn't in proximity, but maybe
# we have enough to make a datetime
# TODO: make sure the combination of
# formats (modifier, dateStd, etc) makes logical sense
# before parsing together
if date or time or units:
combined = orig_inputstring[matches[from_match_index]
[0]:matches[i - 1][1]]
parsed_datetime, flags = self.parse(combined,
sourceTime,
version)
proximity_matches.append((
datetime.datetime(*parsed_datetime[:6]),
flags,
matches[from_match_index][0],
matches[i - 1][1],
combined))
# not in proximity, reset starting from current
from_match_index = i
date = matches[i][3] == 1
time = matches[i][3] == 2
units = matches[i][3] == 3
continue
else:
if matches[i][3] == 1:
date = True
if matches[i][3] == 2:
time = True
if matches[i][3] == 3:
units = True
# check last
# we have enough to make a datetime
if date or time or units:
combined = orig_inputstring[matches[from_match_index][0]:
matches[len(matches) - 1][1]]
parsed_datetime, flags = self.parse(combined, sourceTime,
version)
proximity_matches.append((
datetime.datetime(*parsed_datetime[:6]),
flags,
matches[from_match_index][0],
matches[len(matches) - 1][1],
combined))
elif len(matches) == 0:
return None
else:
if matches[0][3] == 0: # not enough info to parse
return None
else:
combined = orig_inputstring[matches[0][0]:matches[0][1]]
parsed_datetime, flags = self.parse(matches[0][2], sourceTime,
version)
proximity_matches.append((
datetime.datetime(*parsed_datetime[:6]),
flags,
matches[0][0],
matches[0][1],
combined))
return tuple(proximity_matches)
def _initSymbols(ptc):
"""
Initialize symbols and single character constants.
"""
# build am and pm lists to contain
# original case, lowercase, first-char and dotted
# versions of the meridian text
ptc.am = ['', '']
ptc.pm = ['', '']
for idx, xm in enumerate(ptc.locale.meridian[:2]):
# 0: am
# 1: pm
target = ['am', 'pm'][idx]
setattr(ptc, target, [xm])
target = getattr(ptc, target)
if xm:
lxm = xm.lower()
target.extend((xm[0], '{0}.{1}.'.format(*xm),
lxm, lxm[0], '{0}.{1}.'.format(*lxm)))
class Constants(object):
"""
Default set of constants for parsedatetime.
If PyICU is present, then the class will first try to get PyICU
to return a locale specified by C{localeID}. If either C{localeID} is
None or if the locale does not exist within PyICU, then each of the
locales defined in C{fallbackLocales} is tried in order.
If PyICU is not present or none of the specified locales can be used,
then the class will initialize itself to the en_US locale.
if PyICU is not present or not requested, only the locales defined by
C{pdtLocales} will be searched.
"""
def __init__(self, localeID=None, usePyICU=True,
fallbackLocales=['en_US']):
self.localeID = localeID
self.fallbackLocales = fallbackLocales[:]
if 'en_US' not in self.fallbackLocales:
self.fallbackLocales.append('en_US')
# define non-locale specific constants
self.locale = None
self.usePyICU = usePyICU
# starting cache of leap years
# daysInMonth will add to this if during
# runtime it gets a request for a year not found
self._leapYears = list(range(1904, 2097, 4))
self.Second = 1
self.Minute = 60 # 60 * self.Second
self.Hour = 3600 # 60 * self.Minute
self.Day = 86400 # 24 * self.Hour
self.Week = 604800 # 7 * self.Day
self.Month = 2592000 # 30 * self.Day
self.Year = 31536000 # 365 * self.Day
self._DaysInMonthList = (31, 28, 31, 30, 31, 30,
31, 31, 30, 31, 30, 31)
self.rangeSep = '-'
self.BirthdayEpoch = 50
# When True the starting time for all relative calculations will come
# from the given SourceTime, otherwise it will be self.StartHour
self.StartTimeFromSourceTime = False
# The hour of the day that will be used as the starting time for all
# relative calculations when self.StartTimeFromSourceTime is False
self.StartHour = 9
# YearParseStyle controls how we parse "Jun 12", i.e. dates that do
# not have a year present. The default is to compare the date given
# to the current date, and if prior, then assume the next year.
# Setting this to 0 will prevent that.
self.YearParseStyle = 1
# DOWParseStyle controls how we parse "Tuesday"
# If the current day was Thursday and the text to parse is "Tuesday"
# then the following table shows how each style would be returned
# -1, 0, +1
#
# Current day marked as ***
#
# Sun Mon Tue Wed Thu Fri Sat
# week -1
# current -1,0 ***
# week +1 +1
#
# If the current day was Monday and the text to parse is "Tuesday"
# then the following table shows how each style would be returned
# -1, 0, +1
#
# Sun Mon Tue Wed Thu Fri Sat
# week -1 -1
# current *** 0,+1
# week +1
self.DOWParseStyle = 1
# CurrentDOWParseStyle controls how we parse "Friday"
# If the current day was Friday and the text to parse is "Friday"
# then the following table shows how each style would be returned
# True/False. This also depends on DOWParseStyle.
#
# Current day marked as ***
#
# DOWParseStyle = 0
# Sun Mon Tue Wed Thu Fri Sat
# week -1
# current T,F
# week +1
#
# DOWParseStyle = -1
# Sun Mon Tue Wed Thu Fri Sat
# week -1 F
# current T
# week +1
#
# DOWParseStyle = +1
#
# Sun Mon Tue Wed Thu Fri Sat
# week -1
# current T
# week +1 F
self.CurrentDOWParseStyle = False
if self.usePyICU:
self.locale = get_icu(self.localeID)
if self.locale.icu is None:
self.usePyICU = False
self.locale = None
if self.locale is None:
if self.localeID not in pdtLocales:
for localeId in range(0, len(self.fallbackLocales)):
self.localeID = self.fallbackLocales[localeId]
if self.localeID in pdtLocales:
break
self.locale = pdtLocales[self.localeID]
if self.locale is not None:
def _getLocaleDataAdjusted(localeData):
"""
If localeData is defined as ["mon|mnd", 'tu|tues'...] then this
function splits those definitions on |
"""
adjusted = []
for d in localeData:
if '|' in d:
adjusted += d.split("|")
else:
adjusted.append(d)
return adjusted
def re_join(g):
return '|'.join(re.escape(i) for i in g)
mths = _getLocaleDataAdjusted(self.locale.Months)
smths = _getLocaleDataAdjusted(self.locale.shortMonths)
swds = _getLocaleDataAdjusted(self.locale.shortWeekdays)
wds = _getLocaleDataAdjusted(self.locale.Weekdays)
# escape any regex special characters that may be found
self.locale.re_values['months'] = re_join(mths)
self.locale.re_values['shortmonths'] = re_join(smths)
self.locale.re_values['days'] = re_join(wds)
self.locale.re_values['shortdays'] = re_join(swds)
self.locale.re_values['dayoffsets'] = \
re_join(self.locale.dayOffsets)
self.locale.re_values['numbers'] = \
re_join(self.locale.numbers)
self.locale.re_values['decimal_mark'] = \
re.escape(self.locale.decimal_mark)
units = [unit for units in self.locale.units.values()
for unit in units] # flatten
units.sort(key=len, reverse=True) # longest first
self.locale.re_values['units'] = re_join(units)
self.locale.re_values['modifiers'] = re_join(self.locale.Modifiers)
self.locale.re_values['sources'] = re_join(self.locale.re_sources)
# For distinguishing numeric dates from times, look for timeSep
# and meridian, if specified in the locale
self.locale.re_values['timecomponents'] = \
re_join(self.locale.timeSep + self.locale.meridian)
# build weekday offsets - yes, it assumes the Weekday and
# shortWeekday lists are in the same order and Mon..Sun
# (Python style)
def _buildOffsets(offsetDict, localeData, indexStart):
o = indexStart
for key in localeData:
if '|' in key:
for k in key.split('|'):
offsetDict[k] = o
else:
offsetDict[key] = o
o += 1
_buildOffsets(self.locale.WeekdayOffsets,
self.locale.Weekdays, 0)
_buildOffsets(self.locale.WeekdayOffsets,
self.locale.shortWeekdays, 0)
# build month offsets - yes, it assumes the Months and shortMonths
# lists are in the same order and Jan..Dec
_buildOffsets(self.locale.MonthOffsets,
self.locale.Months, 1)
_buildOffsets(self.locale.MonthOffsets,
self.locale.shortMonths, 1)
_initSymbols(self)
# TODO: add code to parse the date formats and build the regexes up
# from sub-parts, find all hard-coded uses of date/time separators
# not being used in code, but kept in case others are manually
# utilizing this regex for their own purposes
self.RE_DATE4 = r'''(?P<date>
(
(
(?P<day>\d\d?)
(?P<suffix>{daysuffix})?
(,)?
(\s)*
)
(?P<mthname>
\b({months}|{shortmonths})\b
)\s*
(?P<year>\d\d
(\d\d)?
)?
)
)'''.format(**self.locale.re_values)
# still not completely sure of the behavior of the regex and
# whether it would be best to consume all possible irrelevant
# characters before the option groups (but within the {1,3} repetition
# group or inside of each option group, as it currently does
# however, right now, all tests are passing that were,
# including fixing the bug of matching a 4-digit year as ddyy
# when the day is absent from the string
self.RE_DATE3 = r'''(?P<date>
(?:
(?:^|\s+)
(?P<mthname>
{months}|{shortmonths}
)\b
|
(?:^|\s+)
(?P<day>[1-9]|[012]\d|3[01])
(?P<suffix>{daysuffix}|)\b
(?!\s*(?:{timecomponents}))
|
,?\s+
(?P<year>\d\d(?:\d\d|))\b
(?!\s*(?:{timecomponents}))
){{1,3}}
(?(mthname)|$-^)
)'''.format(**self.locale.re_values)
# not being used in code, but kept in case others are manually
# utilizing this regex for their own purposes
self.RE_MONTH = r'''(\s+|^)
(?P<month>
(
(?P<mthname>
\b({months}|{shortmonths})\b
)
(\s*
(?P<year>(\d{{4}}))
)?
)
)
(?=\s+|$|[^\w])'''.format(**self.locale.re_values)
self.RE_WEEKDAY = r'''\b
(?:
{days}|{shortdays}
)
\b'''.format(**self.locale.re_values)
self.RE_NUMBER = (r'(\b(?:{numbers})\b|\d+(?:{decimal_mark}\d+|))'
.format(**self.locale.re_values))
self.RE_SPECIAL = (r'(?P<special>^[{specials}]+)\s+'
.format(**self.locale.re_values))
self.RE_UNITS_ONLY = (r'''\b({units})\b'''
.format(**self.locale.re_values))
self.RE_UNITS = r'''\b(?P<qty>
-?
(?:\d+(?:{decimal_mark}\d+|)|(?:{numbers})\b)\s*
(?P<units>{units})
)\b'''.format(**self.locale.re_values)
self.RE_QUNITS = r'''\b(?P<qty>
-?
(?:\d+(?:{decimal_mark}\d+|)|(?:{numbers})\s+)\s*
(?P<qunits>{qunits})
)\b'''.format(**self.locale.re_values)
self.RE_MODIFIER = r'''\b(?:
{modifiers}
)\b'''.format(**self.locale.re_values)
self.RE_TIMEHMS = r'''([\s(\["'-]|^)
(?P<hours>\d\d?)
(?P<tsep>{timeseparator}|)
(?P<minutes>\d\d)
(?:(?P=tsep)
(?P<seconds>\d\d
(?:[\.,]\d+)?
)
)?\b'''.format(**self.locale.re_values)
self.RE_TIMEHMS2 = r'''([\s(\["'-]|^)
(?P<hours>\d\d?)
(?:
(?P<tsep>{timeseparator}|)
(?P<minutes>\d\d?)
(?:(?P=tsep)
(?P<seconds>\d\d?
(?:[\.,]\d+)?
)
)?
)?'''.format(**self.locale.re_values)
# 1, 2, and 3 here refer to the type of match date, time, or units
self.RE_NLP_PREFIX = r'''\b(?P<nlp_prefix>
(on)
(\s)+1
|
(at|in)
(\s)+2
|
(in)
(\s)+3
)'''
if 'meridian' in self.locale.re_values:
self.RE_TIMEHMS2 += (r'\s*(?P<meridian>{meridian})\b'
.format(**self.locale.re_values))
else:
self.RE_TIMEHMS2 += r'\b'
# Always support common . and - separators
dateSeps = ''.join(re.escape(s)
for s in self.locale.dateSep + ['-', '.'])
self.RE_DATE = r'''([\s(\["'-]|^)
(?P<date>
\d\d?[{0}]\d\d?(?:[{0}]\d\d(?:\d\d)?)?
|
\d{{4}}[{0}]\d\d?[{0}]\d\d?
)
\b'''.format(dateSeps)
self.RE_DATE2 = r'[{0}]'.format(dateSeps)
assert 'dayoffsets' in self.locale.re_values
self.RE_DAY = r'''\b
(?:
{dayoffsets}
)
\b'''.format(**self.locale.re_values)
self.RE_DAY2 = r'''(?P<day>\d\d?)
(?P<suffix>{daysuffix})?
'''.format(**self.locale.re_values)
self.RE_TIME = r'''\b
(?:
{sources}
)
\b'''.format(**self.locale.re_values)
self.RE_REMAINING = r'\s+'
# Regex for date/time ranges
self.RE_RTIMEHMS = r'''(\s*|^)
(\d\d?){timeseparator}
(\d\d)
({timeseparator}(\d\d))?
(\s*|$)'''.format(**self.locale.re_values)
self.RE_RTIMEHMS2 = (r'''(\s*|^)
(\d\d?)
({timeseparator}(\d\d?))?
({timeseparator}(\d\d?))?'''
.format(**self.locale.re_values))
if 'meridian' in self.locale.re_values:
self.RE_RTIMEHMS2 += (r'\s*({meridian})'
.format(**self.locale.re_values))
self.RE_RDATE = r'(\d+([%s]\d+)+)' % dateSeps
self.RE_RDATE3 = r'''(
(
(
\b({months})\b
)\s*
(
(\d\d?)
(\s?|{daysuffix}|$)+
)?
(,\s*\d{{4}})?
)
)'''.format(**self.locale.re_values)
# "06/07/06 - 08/09/06"
self.DATERNG1 = (r'{0}\s*{rangeseparator}\s*{0}'
.format(self.RE_RDATE, **self.locale.re_values))
# "march 31 - june 1st, 2006"
self.DATERNG2 = (r'{0}\s*{rangeseparator}\s*{0}'
.format(self.RE_RDATE3, **self.locale.re_values))
# "march 1rd -13th"
self.DATERNG3 = (r'{0}\s*{rangeseparator}\s*(\d\d?)\s*(rd|st|nd|th)?'
.format(self.RE_RDATE3, **self.locale.re_values))
# "4:00:55 pm - 5:90:44 am", '4p-5p'
self.TIMERNG1 = (r'{0}\s*{rangeseparator}\s*{0}'
.format(self.RE_RTIMEHMS2, **self.locale.re_values))
self.TIMERNG2 = (r'{0}\s*{rangeseparator}\s*{0}'
.format(self.RE_RTIMEHMS, **self.locale.re_values))
# "4-5pm "
self.TIMERNG3 = (r'\d\d?\s*{rangeseparator}\s*{0}'
.format(self.RE_RTIMEHMS2, **self.locale.re_values))
# "4:30-5pm "
self.TIMERNG4 = (r'{0}\s*{rangeseparator}\s*{1}'
.format(self.RE_RTIMEHMS, self.RE_RTIMEHMS2,
**self.locale.re_values))
self.re_option = re.IGNORECASE + re.VERBOSE
self.cre_source = {'CRE_SPECIAL': self.RE_SPECIAL,
'CRE_NUMBER': self.RE_NUMBER,
'CRE_UNITS': self.RE_UNITS,
'CRE_UNITS_ONLY': self.RE_UNITS_ONLY,
'CRE_QUNITS': self.RE_QUNITS,
'CRE_MODIFIER': self.RE_MODIFIER,
'CRE_TIMEHMS': self.RE_TIMEHMS,
'CRE_TIMEHMS2': self.RE_TIMEHMS2,
'CRE_DATE': self.RE_DATE,
'CRE_DATE2': self.RE_DATE2,
'CRE_DATE3': self.RE_DATE3,
'CRE_DATE4': self.RE_DATE4,
'CRE_MONTH': self.RE_MONTH,
'CRE_WEEKDAY': self.RE_WEEKDAY,
'CRE_DAY': self.RE_DAY,
'CRE_DAY2': self.RE_DAY2,
'CRE_TIME': self.RE_TIME,
'CRE_REMAINING': self.RE_REMAINING,
'CRE_RTIMEHMS': self.RE_RTIMEHMS,
'CRE_RTIMEHMS2': self.RE_RTIMEHMS2,
'CRE_RDATE': self.RE_RDATE,
'CRE_RDATE3': self.RE_RDATE3,
'CRE_TIMERNG1': self.TIMERNG1,
'CRE_TIMERNG2': self.TIMERNG2,
'CRE_TIMERNG3': self.TIMERNG3,
'CRE_TIMERNG4': self.TIMERNG4,
'CRE_DATERNG1': self.DATERNG1,
'CRE_DATERNG2': self.DATERNG2,
'CRE_DATERNG3': self.DATERNG3,
'CRE_NLP_PREFIX': self.RE_NLP_PREFIX}
self.cre_keys = set(self.cre_source.keys())
def __getattr__(self, name):
if name in self.cre_keys:
value = re.compile(self.cre_source[name], self.re_option)
setattr(self, name, value)
return value
elif name in self.locale.locale_keys:
return getattr(self.locale, name)
else:
raise AttributeError(name)
def daysInMonth(self, month, year):
"""
Take the given month (1-12) and a given year (4 digit) return
the number of days in the month adjusting for leap year as needed
"""
result = None
debug and log.debug('daysInMonth(%s, %s)', month, year)
if month > 0 and month <= 12:
result = self._DaysInMonthList[month - 1]
if month == 2:
if year in self._leapYears:
result += 1
else:
if calendar.isleap(year):
self._leapYears.append(year)
result += 1
return result
def getSource(self, sourceKey, sourceTime=None):
"""
GetReturn a date/time tuple based on the giving source key
and the corresponding key found in self.re_sources.
The current time is used as the default and any specified
item found in self.re_sources is inserted into the value
and the generated dictionary is returned.
"""
if sourceKey not in self.re_sources:
return None
if sourceTime is None:
(yr, mth, dy, hr, mn, sec, wd, yd, isdst) = time.localtime()
else:
(yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime
defaults = {'yr': yr, 'mth': mth, 'dy': dy,
'hr': hr, 'mn': mn, 'sec': sec}
source = self.re_sources[sourceKey]
values = {}
for key, default in defaults.items():
values[key] = source.get(key, default)
return (values['yr'], values['mth'], values['dy'],
values['hr'], values['mn'], values['sec'],
wd, yd, isdst)