o
    iYJ                     @   s   d Z ddlZddlZddlmZmZmZ G dd deZdd Z	G d	d
 d
eZ
G dd deZdddZedkrCddlZe  dS dS )u  Unicode utility functions

>>> from .import unicode_util
>>> from .util import u
>>> u1 = '1'  # DIGIT ONE
>>> u2 = u('a')  # LATIN SMALL LETTER A
>>> u3 = u('２')  # FULLWIDTH DIGIT TWO
>>> u4 = u('Ā')  # LATIN CAPITAL LETTER A WITH MACRON
>>> unicode_util.Category.get(u1) == u('Nd')
True
>>> unicode_util.Category.get(u2) == u('Ll')
True
>>> unicode_util.Category.get(u3) == u('Nd')
True
>>> unicode_util.Category.get(u4) == u('Lu')
True
>>> unicode_util.Category.get(u2) == unicode_util.Category.LOWERCASE_LETTER
True
>>> try:
...     beyond_bmp = u('𐄀')  # AEGEAN WORD SEPARATOR LINE
... except Exception:
...     beyond_bmp = u('')
>>> if len(beyond_bmp) == 1:  # We have a UCS4 build of Python
...     cat_po = unicode_util.Category.get(beyond_bmp)
... else:  # UCS2 build of Python; no non-BMP chars available
...     cat_po = unicode_util.Category.OTHER_PUNCTUATION
>>> cat_po == u('Po')
True
>>> unicode_util.is_letter(u1)
False
>>> unicode_util.is_letter(u2)
True
>>> unicode_util.is_letter(u3)
False
>>> unicode_util.is_letter(u4)
True
>>> b1 = unicode_util.Block.get(u1)
>>> str(b1)
'Block[0000, 007f]'
>>> b1 == unicode_util.Block.BASIC_LATIN
True
>>> b1 == [0x0000, 0x0075]
False
>>> b2 = unicode_util.Block.get(u2)
>>> b2 == unicode_util.Block.BASIC_LATIN
True
>>> b3 = unicode_util.Block.get(u3)
>>> b3 != unicode_util.Block.BASIC_LATIN
True
>>> b3 == unicode_util.Block.HALFWIDTH_AND_FULLWIDTH_FORMS
True
>>> b4 = unicode_util.Block.get(u4)
>>> b4 == unicode_util.Block.LATIN_EXTENDED_A
True
>>> unicode_util.Block.get(u('ࡠ')) == unicode_util.Block.UNKNOWN
True
>>> try:
...     unknown_block = u('𓐰')
... except Exception:
...     unknown_block = u('')
>>> if len(unknown_block) == 1:  # We have a UCS4 build of Python
...     unicode_util.Block.get(u('𓐰')) == unicode_util.Block.UNKNOWN
... else:  # UCS2 build of Python; no unknown characters available
...     True
True
>>> unicode_util.digit(u1)
1
>>> unicode_util.digit(u2, -1)
-1
>>> unicode_util.digit(u3, -1)
2
>>> str(hash(b3))  # doctest: +ELLIPSIS
'...'
    N   )UnicodeMixinunicoduc                   @   sD  e Zd ZdZedZedZedZedZedZ	edZ
edZed	Zed
ZedZedZedZedZedZedZedZedZedZedZedZedZedZedZedZedZedZedZedZ edZ!edZ"ed Z#ed!Z$ed"Z%ed#Z&ed$Z'ed%Z(ed&Z)e*d'd( Z+d)S )*CategoryzaGeneral category of a Unicode character.

    See http://www.unicode.org/reports/tr18/#CategoriesLLuLlLtLmLoMMnMcMeNNdNlNoSSmScSkSoPPcPdPsPePiPfPoZZsZlZpCCcCfCsCoCnc                 C   s   t |}t t|S )zTReturn the general category code (as Unicode string) for the given Unicode character)r   unicodedatacategory)clsuni_char r0   y/var/www/snowflake_co_dev_github/snow_flake_back_end_deploy/env/lib/python3.10/site-packages/phonenumbers/unicode_util.pyget{   s   zCategory.getN),__name__
__module____qualname____doc__r   LETTERUPPERCASE_LETTERLOWERCASE_LETTERTITLECASE_LETTERMODIFIER_LETTEROTHER_LETTERMARKNON_SPACING_MARKSPACING_COMBINING_MARKENCLOSING_MARKNUMBERDECIMAL_DIGIT_NUMBERLETTER_NUMBEROTHER_NUMBERSYMBOLMATH_SYMBOLCURRENCY_SYMBOLMODIFIER_SYMBOLOTHER_SYMBOLPUNCTUATIONCONNECTOR_PUNCTUATIONDASH_PUNCTUATIONOPEN_PUNCTUATIONCLOSE_PUNCTUATIONINITIAL_PUNCTUATIONFINAL_PUNCTUATIONOTHER_PUNCTUATION	SEPARATORSPACE_SEPARATORLINE_SEPARATORPARAGRAPH_SEPARATOROTHERCONTROLFORMAT	SURROGATEPRIVATE_USENOT_ASSIGNEDclassmethodr2   r0   r0   r0   r1   r   Q   sR    r   c                 C   s<   t | }|t jkp|t jkp|t jkp|t jkp|t jkS )zADetermine whether the given Unicode character is a Unicode letter)r   r2   r8   r9   r:   r;   r<   )r/   r-   r0   r0   r1   	is_letter   s   

r]   c                   @   s:   e Zd ZdZdddZdd Zdd Zd	d
 Zdd ZdS )_BlockRangez?Describe the range of characters encompassed by a Unicode blockNc                 C   s$   || _ || _|d ur| ||< d S d S N)startend)selfr`   ra   regdictr0   r0   r1   __init__   s
   z_BlockRange.__init__c                 C   s&   t |tstS | j|jko| j|jkS r_   )
isinstancer^   NotImplementedr`   ra   rb   otherr0   r0   r1   __eq__   s   
z_BlockRange.__eq__c                 C   s
   | |k S r_   r0   rg   r0   r0   r1   __ne__   s   
z_BlockRange.__ne__c                 C   s   t | j| jfS r_   )hashr`   ra   rb   r0   r0   r1   __hash__   s   z_BlockRange.__hash__c                 C   s   t d| j| jf S )NzBlock[%04x, %04x])r   r`   ra   rl   r0   r0   r1   __unicode__   s   z_BlockRange.__unicode__r_   )	r3   r4   r5   r6   rd   ri   rj   rm   rn   r0   r0   r0   r1   r^      s    
r^   c                   @   sL  e Zd ZdZi ZdZeddeZeddeZeddeZ	ed	d
eZ
eddeZeddeZeddeZeddeZeddeZeddeZeddeZeddeZeddeZeddeZedd eZed!d"eZed#d$eZed%d&eZed'd(eZed)d*eZed+d,eZed-d.eZed/d0eZed1d2eZed3d4eZed5d6eZ ed7d8eZ!ed9d:eZ"ed;d<eZ#ed=d>eZ$ed?d@eZ%edAdBeZ&edCdDeZ'edEdFeZ(edGdHeZ)edIdJeZ*edKdLeZ+edMdNeZ,edOdPeZ-edQdReZ.edSdTeZ/edUdVeZ0edWdXeZ1edYdZeZ2ed[d\eZ3ed]d^eZ4ed_d`eZ5edadbeZ6edcddeZ7ededfeZ8edgdheZ9edidjeZ:edkdleZ;edmdneZ<edodpeZ=edqdreZ>edsdteZ?edudveZ@edwdxeZAedydzeZBed{d|eZCed}d~eZDeddeZEeddeZFeddeZGeddeZHeddeZIeddeZJeddeZKeddeZLeddeZMeddeZNeddeZOeddeZPeddeZQeddeZReddeZSeddeZTeddeZUeddeZVeddeZWeddeZXeddeZYeddeZZeddeZ[eddeZ\eddeZ]eddeZ^eddeZ_eddeZ`eddeZaeddeZbeddeZceddeZdeddeZeeddeZfeddeZgeddeZheddeZieddeZjeddeZkeddeZleddeZmeddeZneddeZoeddeZpeddeZqeddeZreddeZseddeZteddeZueddeZveddeZweddeZxeddeZyeddeZzeddeZ{eddeZ|eddeZ}eddeZ~eddeZeddeZeddeZeddeZeddeZeddeZedd eZeddeZeddeZeddeZeddeZed	d
eZeddeZeddeZeddeZeddeZeddeZeddeZeddeZeddeZeddeZeddeZedd eZed!d"eZed#d$eZed%d&eZed'd(eZed)d*eZed+d,eZed-d.eZed/d0eZed1d2eZed3d4eZed5d6eZed7d8eZed9d:eZed;d<eZed=d>eZed?d@eZedAdBeZedCdDeZedEdFeZedGdHeZedIdJeZedKdLeZedMdNeZedOdPeZedQdReZedSdTeZedUdVeZedWdXeZedYdZeZed[d\eZed]d^eZed_d`eZedadbeZedcddeZededfeZedgdheZedidjeZedkdleZedmdneZedodpeZedqdreZedsdteZedudveZedwdxeZedydzeZed{d|eZed}d~eZeddeZeddeZeddeZeddeZeddeZeddeZeddeZeddeZeddeZeddeZeddeZeddeZeddeZeddeZeddeZeddeZeddeZeddeZeddeZeddZeِdd ZdS (  Blockz*Description of the possible Unicode blocksNr               i  i  iO  iP  i  i  i  i   io  ip  i  i   i  i   i/  i0  i  i  i  i   i  i   iO  iP  i  i  i  i  i  i   i?  i@  i_  i 	  i	  i	  i	  i 
  i
  i
  i
  i   i  i  i  i   i  i  i  i   i  i  i  i   i  i  i  i   i  i   i  i  i  i   i  i   i  i  i  i  i  i   i  i  i  i  i  i   i  i   i?  i@  i_  i`  i  i  i  i   i  i  i  i   iO  iP  i  i  i  i  i  i   i  i   i  i   i  i  i  i  i  i   iO  iP  i  i  i  i   i  i  i  i  i  i   i  i   i  i    io   ip   i   i   i   i   i   i !  iO!  iP!  i!  i!  i!  i "  i"  i #  i#  i $  i?$  i@$  i_$  i`$  i$  i %  i%  i%  i%  i%  i%  i &  i&  i '  i'  i'  i'  i'  i'  i (  i(  i )  i)  i)  i)  i *  i*  i +  i+  i ,  i_,  i`,  i,  i,  i,  i -  i/-  i0-  i-  i-  i-  i-  i-  i .  i.  i.  i.  i /  i/  i/  i/  i 0  i?0  i@0  i0  i0  i0  i 1  i/1  i01  i1  i1  i1  i1  i1  i1  i1  i1  i1  i 2  i2  i 3  i3  i 4  iM  iM  iM  i N  i  i   i  i  iϤ  iФ  i  i   i?  i@  i  i  i  i   i  i   i  i   i/  i0  i?  i@  i  i  iߨ  i  i  i   i/  i0  i_  i`  i  i  iߩ  i   i_  i`  i  i  iߪ  i   i/  i  i  i   i  i  i  i   i  i  i  i   i  i   i  i   i  i   iO  iP  i  i   i  i  i  i   i/  i0  iO  iP  io  ip  i  i   i  i  i  i   i  i  i  i  i? i@ i i i i i i i i i i  i/ i0 iO i i i i i  iO iP i i i i  i? i@ i_ i 	 i	 i 	 i?	 i 
 i_
 i`
 i
 i  i? i@ i_ i` i i  iO i` i i  i i i i   i# i $ i$ i 0 i/4 i h i?j i  i i  i i  i i  iO i  i_ i` i i  i i  i/ i0 i i i i  i i  i i  i i  iO i i i  i i   iߦ i  i? i@ i i  i i   i  i  i i   i i   i c                 C   s   t |}t|}tjdu rttj t_ttj|}|dkrD|tjtj|d   j	krD|tjtj|d   j
krDtjtj|d   S |ttjk ri|tjtj|  j	kri|tjtj|  j
kritjtj|  S tjS )z7Return the Unicode block of the given Unicode characterNr   r   )r   ordro   _RANGE_KEYSsorted_RANGESkeysbisectbisect_leftr`   ra   lenUNKNOWN)r.   r/   
code_pointidxr0   r0   r1   r2   }  s   
z	Block.get)r3   r4   r5   r6   rx   rv   r^   BASIC_LATINLATIN_1_SUPPLEMENTLATIN_EXTENDED_ALATIN_EXTENDED_BIPA_EXTENSIONSSPACING_MODIFIER_LETTERSCOMBINING_DIACRITICAL_MARKSGREEK_AND_COPTICCYRILLICCYRILLIC_SUPPLEMENTARMENIANHEBREWARABICSYRIACARABIC_SUPPLEMENTTHAANANKO	SAMARITANMANDAIC
DEVANAGARIBENGALIGURMUKHIGUJARATIORIYATAMILTELUGUKANNADA	MALAYALAMSINHALATHAILAOTIBETANMYANMARGEORGIANHANGUL_JAMOETHIOPICETHIOPIC_SUPPLEMENTCHEROKEE%UNIFIED_CANADIAN_ABORIGINAL_SYLLABICSOGHAMRUNICTAGALOGHANUNOOBUHIDTAGBANWAKHMER	MONGOLIAN.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDEDLIMBUTAI_LENEW_TAI_LUEKHMER_SYMBOLSBUGINESETAI_THAMBALINESE	SUNDANESEBATAKLEPCHAOL_CHIKIVEDIC_EXTENSIONSPHONETIC_EXTENSIONSPHONETIC_EXTENSIONS_SUPPLEMENT&COMBINING_DIACRITICAL_MARKS_SUPPLEMENTLATIN_EXTENDED_ADDITIONALGREEK_EXTENDEDGENERAL_PUNCTUATIONSUPERSCRIPTS_AND_SUBSCRIPTSCURRENCY_SYMBOLS'COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLSLETTERLIKE_SYMBOLSNUMBER_FORMSARROWSMATHEMATICAL_OPERATORSMISCELLANEOUS_TECHNICALCONTROL_PICTURESOPTICAL_CHARACTER_RECOGNITIONENCLOSED_ALPHANUMERICSBOX_DRAWINGBLOCK_ELEMENTSGEOMETRIC_SHAPESMISCELLANEOUS_SYMBOLSDINGBATS$MISCELLANEOUS_MATHEMATICAL_SYMBOLS_ASUPPLEMENTAL_ARROWS_ABRAILLE_PATTERNSSUPPLEMENTAL_ARROWS_B$MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B#SUPPLEMENTAL_MATHEMATICAL_OPERATORS MISCELLANEOUS_SYMBOLS_AND_ARROWS
GLAGOLITICLATIN_EXTENDED_CCOPTICGEORGIAN_SUPPLEMENTTIFINAGHETHIOPIC_EXTENDEDCYRILLIC_EXTENDED_ASUPPLEMENTAL_PUNCTUATIONCJK_RADICALS_SUPPLEMENTKANGXI_RADICALS"IDEOGRAPHIC_DESCRIPTION_CHARACTERSCJK_SYMBOLS_AND_PUNCTUATIONHIRAGANAKATAKANABOPOMOFOHANGUL_COMPATIBILITY_JAMOKANBUNBOPOMOFO_EXTENDEDCJK_STROKESKATAKANA_PHONETIC_EXTENSIONSENCLOSED_CJK_LETTERS_AND_MONTHSCJK_COMPATIBILITY"CJK_UNIFIED_IDEOGRAPHS_EXTENSION_AYIJING_HEXAGRAM_SYMBOLSCJK_UNIFIED_IDEOGRAPHSYI_SYLLABLESYI_RADICALSLISUVAICYRILLIC_EXTENDED_BBAMUMMODIFIER_TONE_LETTERSLATIN_EXTENDED_DSYLOTI_NAGRICOMMON_INDIC_NUMBER_FORMSPHAGS_PA
SAURASHTRADEVANAGARI_EXTENDEDKAYAH_LIREJANGHANGUL_JAMO_EXTENDED_AJAVANESECHAMMYANMAR_EXTENDED_ATAI_VIETETHIOPIC_EXTENDED_AMEETEI_MAYEKHANGUL_SYLLABLESHANGUL_JAMO_EXTENDED_BHIGH_SURROGATESHIGH_PRIVATE_USE_SURROGATESLOW_SURROGATESPRIVATE_USE_AREACJK_COMPATIBILITY_IDEOGRAPHSALPHABETIC_PRESENTATION_FORMSARABIC_PRESENTATION_FORMS_AVARIATION_SELECTORSVERTICAL_FORMSCOMBINING_HALF_MARKSCJK_COMPATIBILITY_FORMSSMALL_FORM_VARIANTSARABIC_PRESENTATION_FORMS_BHALFWIDTH_AND_FULLWIDTH_FORMSSPECIALSLINEAR_B_SYLLABARYLINEAR_B_IDEOGRAMSAEGEAN_NUMBERSANCIENT_GREEK_NUMBERSANCIENT_SYMBOLSPHAISTOS_DISCLYCIANCARIAN
OLD_ITALICGOTHICUGARITICOLD_PERSIANDESERETSHAVIANOSMANYACYPRIOT_SYLLABARYIMPERIAL_ARAMAIC
PHOENICIANLYDIAN
KHAROSHTHIOLD_SOUTH_ARABIANAVESTANINSCRIPTIONAL_PARTHIANINSCRIPTIONAL_PAHLAVI
OLD_TURKICRUMI_NUMERAL_SYMBOLSBRAHMIKAITHI	CUNEIFORM!CUNEIFORM_NUMBERS_AND_PUNCTUATIONEGYPTIAN_HIEROGLYPHSBAMUM_SUPPLEMENTKANA_SUPPLEMENTBYZANTINE_MUSICAL_SYMBOLSMUSICAL_SYMBOLSANCIENT_GREEK_MUSICAL_NOTATIONTAI_XUAN_JING_SYMBOLSCOUNTING_ROD_NUMERALS!MATHEMATICAL_ALPHANUMERIC_SYMBOLSMAHJONG_TILESDOMINO_TILESPLAYING_CARDS ENCLOSED_ALPHANUMERIC_SUPPLEMENTENCLOSED_IDEOGRAPHIC_SUPPLEMENT%MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS	EMOTICONSTRANSPORT_AND_MAP_SYMBOLSALCHEMICAL_SYMBOLS"CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B"CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C"CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D'CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENTTAGSVARIATION_SELECTORS_SUPPLEMENT SUPPLEMENTARY_PRIVATE_USE_AREA_A SUPPLEMENTARY_PRIVATE_USE_AREA_Br}   r\   r2   r0   r0   r0   r1   ro      s   ro   c                 C   s&   t | } |durt| |S t| S )zReturns the digit value assigned to the Unicode character uni_char as
    integer. If no such value is defined, default is returned, or, if not
    given, ValueError is raised.N)r   r,   digit)r/   default_valuer0   r0   r1   rQ    s   
rQ  __main__r_   )r6   rz   r,   utilr   r   r   objectr   r]   r^   ro   rQ  r3   doctesttestmodr0   r0   r0   r1   <module>   s    J1
 
o