// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** * Copyright (C) 1997-2014, International Business Machines * Corporation and others. All Rights Reserved. ****************************************************************************** */ /** * \file * \brief C++ API: Collation Element Iterator. */ /** * File coleitr.h * * Created by: Helena Shih * * Modification History: * * Date Name Description * * 8/18/97 helena Added internal API documentation. * 08/03/98 erm Synched with 1.2 version CollationElementIterator.java * 12/10/99 aliu Ported Thai collation support from Java. * 01/25/01 swquek Modified into a C++ wrapper calling C APIs (ucoliter.h) * 02/19/01 swquek Removed CollationElementsIterator() since it is * private constructor and no calls are made to it * 2012-2014 markus Rewritten in C++ again. */ #ifndef COLEITR_H #define COLEITR_H #include "unicode/utypes.h" #if U_SHOW_CPLUSPLUS_API #if !UCONFIG_NO_COLLATION #include "unicode/unistr.h" #include "unicode/uobject.h" struct UCollationElements; struct UHashtable; U_NAMESPACE_BEGIN struct CollationData; class CharacterIterator; class CollationIterator; class RuleBasedCollator; class UCollationPCE; class UVector32; /** * The CollationElementIterator class is used as an iterator to walk through * each character of an international string. Use the iterator to return the * ordering priority of the positioned character. The ordering priority of a * character, which we refer to as a key, defines how a character is collated in * the given collation object. * For example, consider the following in Slovak and in traditional Spanish collation: * <pre> * "ca" -> the first key is key('c') and second key is key('a'). * "cha" -> the first key is key('ch') and second key is key('a').</pre> * And in German phonebook collation, * <pre> \htmlonly "æb"-> the first key is key('a'), the second key is key('e'), and * the third key is key('b'). \endhtmlonly </pre> * The key of a character, is an integer composed of primary order(short), * secondary order(char), and tertiary order(char). Java strictly defines the * size and signedness of its primitive data types. Therefore, the static * functions primaryOrder(), secondaryOrder(), and tertiaryOrder() return * int32_t to ensure the correctness of the key value. * <p>Example of the iterator usage: (without error checking) * <pre> * \code * void CollationElementIterator_Example() * { * UnicodeString str = "This is a test"; * UErrorCode success = U_ZERO_ERROR; * RuleBasedCollator* rbc = * (RuleBasedCollator*) RuleBasedCollator::createInstance(success); * CollationElementIterator* c = * rbc->createCollationElementIterator( str ); * int32_t order = c->next(success); * c->reset(); * order = c->previous(success); * delete c; * delete rbc; * } * \endcode * </pre> * <p> * The method next() returns the collation order of the next character based on * the comparison level of the collator. The method previous() returns the * collation order of the previous character based on the comparison level of * the collator. The Collation Element Iterator moves only in one direction * between calls to reset(), setOffset(), or setText(). That is, next() * and previous() can not be inter-used. Whenever previous() is to be called after * next() or vice versa, reset(), setOffset() or setText() has to be called first * to reset the status, shifting pointers to either the end or the start of * the string (reset() or setText()), or the specified position (setOffset()). * Hence at the next call of next() or previous(), the first or last collation order, * or collation order at the specified position will be returned. If a change of * direction is done without one of these calls, the result is undefined. * <p> * The result of a forward iterate (next()) and reversed result of the backward * iterate (previous()) on the same string are equivalent, if collation orders * with the value 0 are ignored. * Character based on the comparison level of the collator. A collation order * consists of primary order, secondary order and tertiary order. The data * type of the collation order is <strong>int32_t</strong>. * * Note, CollationElementIterator should not be subclassed. * @see Collator * @see RuleBasedCollator * @version 1.8 Jan 16 2001 */ class U_I18N_API CollationElementIterator final : public UObject { public: // CollationElementIterator public data member ------------------------------ enum { /** * NULLORDER indicates that an error has occurred while processing * @stable ICU 2.0 */ NULLORDER = (int32_t)0xffffffff }; // CollationElementIterator public constructor/destructor ------------------- /** * Copy constructor. * * @param other the object to be copied from * @stable ICU 2.0 */ CollationElementIterator(const CollationElementIterator& other); /** * Destructor * @stable ICU 2.0 */ virtual ~CollationElementIterator(); // CollationElementIterator public methods ---------------------------------- /** * Returns true if "other" is the same as "this" * * @param other the object to be compared * @return true if "other" is the same as "this" * @stable ICU 2.0 */ bool operator==(const CollationElementIterator& other) const; /** * Returns true if "other" is not the same as "this". * * @param other the object to be compared * @return true if "other" is not the same as "this" * @stable ICU 2.0 */ bool operator!=(const CollationElementIterator& other) const; /** * Resets the cursor to the beginning of the string. * @stable ICU 2.0 */ void reset(void); /** * Gets the ordering priority of the next character in the string. * @param status the error code status. * @return the next character's ordering. otherwise returns NULLORDER if an * error has occurred or if the end of string has been reached * @stable ICU 2.0 */ int32_t next(UErrorCode& status); /** * Get the ordering priority of the previous collation element in the string. * @param status the error code status. * @return the previous element's ordering. otherwise returns NULLORDER if an * error has occurred or if the start of string has been reached * @stable ICU 2.0 */ int32_t previous(UErrorCode& status); /** * Gets the primary order of a collation order. * @param order the collation order * @return the primary order of a collation order. * @stable ICU 2.0 */ static inline int32_t primaryOrder(int32_t order); /** * Gets the secondary order of a collation order. * @param order the collation order * @return the secondary order of a collation order. * @stable ICU 2.0 */ static inline int32_t secondaryOrder(int32_t order); /** * Gets the tertiary order of a collation order. * @param order the collation order * @return the tertiary order of a collation order. * @stable ICU 2.0 */ static inline int32_t tertiaryOrder(int32_t order); /** * Return the maximum length of any expansion sequences that end with the * specified comparison order. * @param order a collation order returned by previous or next. * @return maximum size of the expansion sequences ending with the collation * element or 1 if collation element does not occur at the end of any * expansion sequence * @stable ICU 2.0 */ int32_t getMaxExpansion(int32_t order) const; /** * Gets the comparison order in the desired strength. Ignore the other * differences. * @param order The order value * @stable ICU 2.0 */ int32_t strengthOrder(int32_t order) const; /** * Sets the source string. * @param str the source string. * @param status the error code status. * @stable ICU 2.0 */ void setText(const UnicodeString& str, UErrorCode& status); /** * Sets the source string. * @param str the source character iterator. * @param status the error code status. * @stable ICU 2.0 */ void setText(CharacterIterator& str, UErrorCode& status); /** * Checks if a comparison order is ignorable. * @param order the collation order. * @return true if a character is ignorable, false otherwise. * @stable ICU 2.0 */ static inline UBool isIgnorable(int32_t order); /** * Gets the offset of the currently processed character in the source string. * @return the offset of the character. * @stable ICU 2.0 */ int32_t getOffset(void) const; /** * Sets the offset of the currently processed character in the source string. * @param newOffset the new offset. * @param status the error code status. * @return the offset of the character. * @stable ICU 2.0 */ void setOffset(int32_t newOffset, UErrorCode& status); /** * ICU "poor man's RTTI", returns a UClassID for the actual class. * * @stable ICU 2.2 */ virtual UClassID getDynamicClassID() const override; /** * ICU "poor man's RTTI", returns a UClassID for this class. * * @stable ICU 2.2 */ static UClassID U_EXPORT2 getStaticClassID(); #ifndef U_HIDE_INTERNAL_API /** @internal */ static inline CollationElementIterator *fromUCollationElements(UCollationElements *uc) { return reinterpret_cast<CollationElementIterator *>(uc); } /** @internal */ static inline const CollationElementIterator *fromUCollationElements(const UCollationElements *uc) { return reinterpret_cast<const CollationElementIterator *>(uc); } /** @internal */ inline UCollationElements *toUCollationElements() { return reinterpret_cast<UCollationElements *>(this); } /** @internal */ inline const UCollationElements *toUCollationElements() const { return reinterpret_cast<const UCollationElements *>(this); } #endif // U_HIDE_INTERNAL_API private: friend class RuleBasedCollator; friend class UCollationPCE; /** * CollationElementIterator constructor. This takes the source string and the * collation object. The cursor will walk thru the source string based on the * predefined collation rules. If the source string is empty, NULLORDER will * be returned on the calls to next(). * @param sourceText the source string. * @param order the collation object. * @param status the error code status. */ CollationElementIterator(const UnicodeString& sourceText, const RuleBasedCollator* order, UErrorCode& status); // Note: The constructors should take settings & tailoring, not a collator, // to avoid circular dependencies. // However, for operator==() we would need to be able to compare tailoring data for equality // without making CollationData or CollationTailoring depend on TailoredSet. // (See the implementation of RuleBasedCollator::operator==().) // That might require creating an intermediate class that would be used // by both CollationElementIterator and RuleBasedCollator // but only contain the part of RBC== related to data and rules. /** * CollationElementIterator constructor. This takes the source string and the * collation object. The cursor will walk thru the source string based on the * predefined collation rules. If the source string is empty, NULLORDER will * be returned on the calls to next(). * @param sourceText the source string. * @param order the collation object. * @param status the error code status. */ CollationElementIterator(const CharacterIterator& sourceText, const RuleBasedCollator* order, UErrorCode& status); /** * Assignment operator * * @param other the object to be copied */ const CollationElementIterator& operator=(const CollationElementIterator& other); CollationElementIterator() = delete; // default constructor not implemented /** Normalizes dir_=1 (just after setOffset()) to dir_=0 (just after reset()). */ inline int8_t normalizeDir() const { return dir_ == 1 ? 0 : dir_; } static UHashtable *computeMaxExpansions(const CollationData *data, UErrorCode &errorCode); static int32_t getMaxExpansion(const UHashtable *maxExpansions, int32_t order); // CollationElementIterator private data members ---------------------------- CollationIterator *iter_; // owned const RuleBasedCollator *rbc_; // aliased uint32_t otherHalf_; /** * <0: backwards; 0: just after reset() (previous() begins from end); * 1: just after setOffset(); >1: forward */ int8_t dir_; /** * Stores offsets from expansions and from unsafe-backwards iteration, * so that getOffset() returns intermediate offsets for the CEs * that are consistent with forward iteration. */ UVector32 *offsets_; UnicodeString string_; }; // CollationElementIterator inline method definitions -------------------------- inline int32_t CollationElementIterator::primaryOrder(int32_t order) { return (order >> 16) & 0xffff; } inline int32_t CollationElementIterator::secondaryOrder(int32_t order) { return (order >> 8) & 0xff; } inline int32_t CollationElementIterator::tertiaryOrder(int32_t order) { return order & 0xff; } inline UBool CollationElementIterator::isIgnorable(int32_t order) { return (order & 0xffff0000) == 0; } U_NAMESPACE_END #endif /* #if !UCONFIG_NO_COLLATION */ #endif /* U_SHOW_CPLUSPLUS_API */ #endif
Name | Type | Size | Permission | Actions |
---|---|---|---|---|
alphaindex.h | File | 26.54 KB | 0644 |
|
appendable.h | File | 8.54 KB | 0644 |
|
basictz.h | File | 9.99 KB | 0644 |
|
brkiter.h | File | 27.86 KB | 0644 |
|
bytestream.h | File | 10.75 KB | 0644 |
|
bytestrie.h | File | 20.8 KB | 0644 |
|
bytestriebuilder.h | File | 7.48 KB | 0644 |
|
calendar.h | File | 106.52 KB | 0644 |
|
caniter.h | File | 7.47 KB | 0644 |
|
casemap.h | File | 25.42 KB | 0644 |
|
char16ptr.h | File | 7.22 KB | 0644 |
|
chariter.h | File | 24.06 KB | 0644 |
|
choicfmt.h | File | 24 KB | 0644 |
|
coleitr.h | File | 13.78 KB | 0644 |
|
coll.h | File | 56.3 KB | 0644 |
|
compactdecimalformat.h | File | 6.88 KB | 0644 |
|
curramt.h | File | 3.67 KB | 0644 |
|
currpinf.h | File | 7.3 KB | 0644 |
|
currunit.h | File | 4.02 KB | 0644 |
|
datefmt.h | File | 40.72 KB | 0644 |
|
dbbi.h | File | 1.19 KB | 0644 |
|
dcfmtsym.h | File | 20.94 KB | 0644 |
|
decimfmt.h | File | 87.54 KB | 0644 |
|
displayoptions.h | File | 7.08 KB | 0644 |
|
docmain.h | File | 7.3 KB | 0644 |
|
dtfmtsym.h | File | 38.23 KB | 0644 |
|
dtintrv.h | File | 3.85 KB | 0644 |
|
dtitvfmt.h | File | 49.26 KB | 0644 |
|
dtitvinf.h | File | 18.63 KB | 0644 |
|
dtptngen.h | File | 28.64 KB | 0644 |
|
dtrule.h | File | 8.69 KB | 0644 |
|
edits.h | File | 20.73 KB | 0644 |
|
enumset.h | File | 2.08 KB | 0644 |
|
errorcode.h | File | 4.84 KB | 0644 |
|
fieldpos.h | File | 8.7 KB | 0644 |
|
filteredbrk.h | File | 5.37 KB | 0644 |
|
fmtable.h | File | 24.45 KB | 0644 |
|
format.h | File | 12.5 KB | 0644 |
|
formattednumber.h | File | 6.15 KB | 0644 |
|
formattedvalue.h | File | 9.75 KB | 0644 |
|
fpositer.h | File | 3.03 KB | 0644 |
|
gender.h | File | 3.35 KB | 0644 |
|
gregocal.h | File | 30.03 KB | 0644 |
|
icudataver.h | File | 1.02 KB | 0644 |
|
icuplug.h | File | 12.1 KB | 0644 |
|
idna.h | File | 12.71 KB | 0644 |
|
listformatter.h | File | 8.59 KB | 0644 |
|
localebuilder.h | File | 11.08 KB | 0644 |
|
localematcher.h | File | 26.83 KB | 0644 |
|
localpointer.h | File | 19.44 KB | 0644 |
|
locdspnm.h | File | 7.12 KB | 0644 |
|
locid.h | File | 48.27 KB | 0644 |
|
measfmt.h | File | 11.42 KB | 0644 |
|
measunit.h | File | 107.38 KB | 0644 |
|
measure.h | File | 4.69 KB | 0644 |
|
messagepattern.h | File | 33.72 KB | 0644 |
|
msgfmt.h | File | 44.21 KB | 0644 |
|
normalizer2.h | File | 34.73 KB | 0644 |
|
normlzr.h | File | 30.97 KB | 0644 |
|
nounit.h | File | 2.25 KB | 0644 |
|
numberformatter.h | File | 90.03 KB | 0644 |
|
numberrangeformatter.h | File | 25.32 KB | 0644 |
|
numfmt.h | File | 50.26 KB | 0644 |
|
numsys.h | File | 7.23 KB | 0644 |
|
parseerr.h | File | 3.08 KB | 0644 |
|
parsepos.h | File | 5.57 KB | 0644 |
|
platform.h | File | 27.8 KB | 0644 |
|
plurfmt.h | File | 25.25 KB | 0644 |
|
plurrule.h | File | 20.64 KB | 0644 |
|
ptypes.h | File | 3.49 KB | 0644 |
|
putil.h | File | 6.32 KB | 0644 |
|
rbbi.h | File | 32.07 KB | 0644 |
|
rbnf.h | File | 49.92 KB | 0644 |
|
rbtz.h | File | 15.77 KB | 0644 |
|
regex.h | File | 84.45 KB | 0644 |
|
region.h | File | 9.2 KB | 0644 |
|
reldatefmt.h | File | 22.36 KB | 0644 |
|
rep.h | File | 9.38 KB | 0644 |
|
resbund.h | File | 18.11 KB | 0644 |
|
schriter.h | File | 6.1 KB | 0644 |
|
scientificnumberformatter.h | File | 6.44 KB | 0644 |
|
search.h | File | 22.24 KB | 0644 |
|
selfmt.h | File | 14.35 KB | 0644 |
|
simpleformatter.h | File | 12.6 KB | 0644 |
|
simplenumberformatter.h | File | 8.88 KB | 0644 |
|
simpletz.h | File | 45.65 KB | 0644 |
|
smpdtfmt.h | File | 71.85 KB | 0644 |
|
sortkey.h | File | 11.19 KB | 0644 |
|
std_string.h | File | 1.05 KB | 0644 |
|
strenum.h | File | 9.96 KB | 0644 |
|
stringoptions.h | File | 5.79 KB | 0644 |
|
stringpiece.h | File | 10.05 KB | 0644 |
|
stringtriebuilder.h | File | 15.5 KB | 0644 |
|
stsearch.h | File | 21.44 KB | 0644 |
|
symtable.h | File | 4.28 KB | 0644 |
|
tblcoll.h | File | 36.94 KB | 0644 |
|
timezone.h | File | 45.67 KB | 0644 |
|
tmunit.h | File | 3.4 KB | 0644 |
|
tmutamt.h | File | 4.91 KB | 0644 |
|
tmutfmt.h | File | 7.42 KB | 0644 |
|
translit.h | File | 65.83 KB | 0644 |
|
tzfmt.h | File | 42.96 KB | 0644 |
|
tznames.h | File | 16.85 KB | 0644 |
|
tzrule.h | File | 34.86 KB | 0644 |
|
tztrans.h | File | 6.13 KB | 0644 |
|
ubidi.h | File | 89.61 KB | 0644 |
|
ubiditransform.h | File | 12.71 KB | 0644 |
|
ubrk.h | File | 24.43 KB | 0644 |
|
ucal.h | File | 64.28 KB | 0644 |
|
ucasemap.h | File | 15.21 KB | 0644 |
|
ucat.h | File | 5.35 KB | 0644 |
|
uchar.h | File | 145.7 KB | 0644 |
|
ucharstrie.h | File | 22.56 KB | 0644 |
|
ucharstriebuilder.h | File | 7.48 KB | 0644 |
|
uchriter.h | File | 13.42 KB | 0644 |
|
uclean.h | File | 11.21 KB | 0644 |
|
ucnv.h | File | 83.46 KB | 0644 |
|
ucnv_cb.h | File | 6.58 KB | 0644 |
|
ucnv_err.h | File | 20.98 KB | 0644 |
|
ucnvsel.h | File | 6.24 KB | 0644 |
|
ucol.h | File | 62.7 KB | 0644 |
|
ucoleitr.h | File | 9.82 KB | 0644 |
|
uconfig.h | File | 12.31 KB | 0644 |
|
ucpmap.h | File | 5.54 KB | 0644 |
|
ucptrie.h | File | 22.51 KB | 0644 |
|
ucsdet.h | File | 14.69 KB | 0644 |
|
ucurr.h | File | 16.72 KB | 0644 |
|
udat.h | File | 62.36 KB | 0644 |
|
udata.h | File | 15.63 KB | 0644 |
|
udateintervalformat.h | File | 11.93 KB | 0644 |
|
udatpg.h | File | 30.13 KB | 0644 |
|
udisplaycontext.h | File | 5.94 KB | 0644 |
|
udisplayoptions.h | File | 8.86 KB | 0644 |
|
uenum.h | File | 7.79 KB | 0644 |
|
ufieldpositer.h | File | 4.41 KB | 0644 |
|
uformattable.h | File | 10.97 KB | 0644 |
|
uformattednumber.h | File | 8.09 KB | 0644 |
|
uformattedvalue.h | File | 12.25 KB | 0644 |
|
ugender.h | File | 2.06 KB | 0644 |
|
uidna.h | File | 33.43 KB | 0644 |
|
uiter.h | File | 22.75 KB | 0644 |
|
uldnames.h | File | 10.48 KB | 0644 |
|
ulistformatter.h | File | 10.78 KB | 0644 |
|
uloc.h | File | 54.66 KB | 0644 |
|
ulocale.h | File | 6.35 KB | 0644 |
|
ulocbuilder.h | File | 16.72 KB | 0644 |
|
ulocdata.h | File | 11.3 KB | 0644 |
|
umachine.h | File | 15 KB | 0644 |
|
umisc.h | File | 1.34 KB | 0644 |
|
umsg.h | File | 24.25 KB | 0644 |
|
umutablecptrie.h | File | 8.3 KB | 0644 |
|
unifilt.h | File | 4 KB | 0644 |
|
unifunct.h | File | 4.05 KB | 0644 |
|
unimatch.h | File | 6.1 KB | 0644 |
|
unirepl.h | File | 3.38 KB | 0644 |
|
uniset.h | File | 66.85 KB | 0644 |
|
unistr.h | File | 171.35 KB | 0644 |
|
unorm.h | File | 20.55 KB | 0644 |
|
unorm2.h | File | 25.71 KB | 0644 |
|
unum.h | File | 55.16 KB | 0644 |
|
unumberformatter.h | File | 19.68 KB | 0644 |
|
unumberoptions.h | File | 5.23 KB | 0644 |
|
unumberrangeformatter.h | File | 15.35 KB | 0644 |
|
unumsys.h | File | 7.26 KB | 0644 |
|
uobject.h | File | 10.66 KB | 0644 |
|
upluralrules.h | File | 8.79 KB | 0644 |
|
uregex.h | File | 71.99 KB | 0644 |
|
uregion.h | File | 9.81 KB | 0644 |
|
ureldatefmt.h | File | 16.98 KB | 0644 |
|
urename.h | File | 140.82 KB | 0644 |
|
urep.h | File | 5.38 KB | 0644 |
|
ures.h | File | 36.65 KB | 0644 |
|
uscript.h | File | 27.8 KB | 0644 |
|
usearch.h | File | 39.21 KB | 0644 |
|
uset.h | File | 45.61 KB | 0644 |
|
usetiter.h | File | 9.63 KB | 0644 |
|
ushape.h | File | 18 KB | 0644 |
|
usimplenumberformatter.h | File | 7.46 KB | 0644 |
|
uspoof.h | File | 80.32 KB | 0644 |
|
usprep.h | File | 8.19 KB | 0644 |
|
ustdio.h | File | 38.56 KB | 0644 |
|
ustream.h | File | 1.89 KB | 0644 |
|
ustring.h | File | 72.13 KB | 0644 |
|
ustringtrie.h | File | 3.15 KB | 0644 |
|
utext.h | File | 58.1 KB | 0644 |
|
utf.h | File | 7.87 KB | 0644 |
|
utf16.h | File | 23.35 KB | 0644 |
|
utf32.h | File | 763 B | 0644 |
|
utf8.h | File | 30.83 KB | 0644 |
|
utf_old.h | File | 45.8 KB | 0644 |
|
utmscale.h | File | 13.78 KB | 0644 |
|
utrace.h | File | 17.18 KB | 0644 |
|
utrans.h | File | 25.54 KB | 0644 |
|
utypes.h | File | 31.06 KB | 0644 |
|
uvernum.h | File | 6.33 KB | 0644 |
|
uversion.h | File | 5.99 KB | 0644 |
|
vtzone.h | File | 20.69 KB | 0644 |
|