
    QdM                     p	   d Z 	 ddlZn# e$ r dZY nw xY w ed          ZdZdZdZdZ	g dZ
g d	Zg d
Zi dddddddddddddddddddddd d!dd"d#d$dd%dd&dd'dd(dd)d*d+dddd+ddd+dd+d,Zdd-d.dd/d0d0ddd1d2d2d3Zi d4dd$d5d5d5d5d5d5d6d7dd$d5d8d5d5d5d5d6d9dd$d5d5d5d5d5d5d6d:dd$d5d8d5d5d5d5d6d;dd$d5d5d5d8d5d5d6d<dd$d5d8d5d8d5d5d6d=dd$d5d5d5d5d5d5d6d>dd$d5d8d5d5d5d5d6d?dd$d5d5d5d5d5d5d6d@dd$d5d8d5d5d5d5d6dAdd$d5d5d5d5d5d5d6dBdd$d5d8d5d5d5d5d6dCdd$d5d5d5d5d5d5d6dDdd$d5d8d8d5d5d5d6dEdd$d5d8d8d5d5d5d6dFdd$d5d8d8d5d5d5d6dGdd$d5d8d8d8d5d5d6i dHdd$d5d8d8d5d5d5d6dIdd$d5d8d8d5d5d5d6dJdd$d5d8d8d5d5d5d6dKdd$d5d8d8d5d5d5d6dLdd'd5d8d5d5d5d5d6dMdd'd5d8d5d5d5d5d6dNdd'd5d8d5d8d5d5d6dOdd'd5d8d5d5d5d5d6dPdd'd5d8d5d5d5d5d6dQddRd5d8d5d5d5d5d6dSddRd5d8d5d8d5d5d6dTdd&d5d5d5d5d5d5d6dUdd&d5d8d5d5d5d5d6dVdd&d5d5d5d5d5d5d6dWdd&d5d8d5d5d5d5d6dXdd&d5d5d5d5d5d5d6dYdd&d5d8d5d5d5d5d6i dZdd&d5d5d5d5d5d5d6d[dd&d5d8d5d5d5d5d6d\dd&d5d5d5d5d5d5d6d]dd&d5d8d5d5d5d5d6d^dd&d5d5d5d8d5d5d6d_dd&d5d8d5d8d5d5d6d`dd&d5d5d5d5d5d5d6dadd&d5d8d5d5d5d5d6dbdd&d5d5d5d5d5d5d6dcdd&d5d8d5d5d5d5d6dddd&d5d5d5d5d5d5d6dedd&d5d8d5d5d5d5d6dfdd&d5d5d5d5d5d5d6dgdd&d5d8d5d5d5d5d6dhdd&d5d5d5d5d5d5d6didd&d5d8d5d5d5d5d6djdd&d5d5d5d5d8d5d6i dkdd&d5d8d5d5d8d5d6dlddmd5d8d5d5d5d5d6dnddmd5d8d5d5d5d5d6doddmd5d8d5d8d5d5d6dpddmd5d8d5d5d5d5d6dqddmd5d8d5d5d5d5d6drddmd5d8d5d5d8d5d6dsd!dmd5d8d5d5d5d5d6dtd"dud8d8d5d5d5dvdwd5d5d5dxdyd"dud8d8d5d5d5dvdwd8d5d5dxdzd"dud8d8d5d5d5d{dwd5d5d5dxd|d"dud8d8d5d5d5d{dwd5d8d5dxd}d"dud8d8d5d5d5d{dwd8d5d5dxd~d"dud8d8d5d5d5d{dwd5d5d5dxdd"dud8d8d5d5d5d{dwd8d5d5dxdd"dud8d8d5d5d5ddwd5d5d5dxdd"dud8d8d5d5d5ddwd5d5d5dxd"dud8d8d5d5d5ddwd5d8d5dxd"dud8d8d5d5d5dvdd5d5d5dxd"dud8d8d5d5d5dvdd8d5d5dxd"dud8d8d5d5d5d{dd5d5d5dxd"dud8d8d5d5d5dvdd8d5d5dxd"dud8d8d5d5d5dvdd8d8d5dxd"dud8d8d5d5d5d{dd8d5d5dxd"dud8d8d5d5d5d{dd8d8d5dxd"dud8d8d5d5d5d{dd8d5d5dxd"dud8d8d5d5d5ddd5d5d5dxd"dud8d8d5d5d5dvdwd5d8d5dxdZddZd Zd Zd Zd Zd Zd Zd Zd Zd ZdZedk    r e             dS dS )u  
ALINE
https://webdocs.cs.ualberta.ca/~kondrak/
Copyright 2002 by Grzegorz Kondrak.

ALINE is an algorithm for aligning phonetic sequences, described in [1].
This module is a port of Kondrak's (2002) ALINE. It provides functions for
phonetic sequence alignment and similarity analysis. These are useful in
historical linguistics, sociolinguistics and synchronic phonology.

ALINE has parameters that can be tuned for desired output. These parameters are:
- C_skip, C_sub, C_exp, C_vwl
- Salience weights
- Segmental features

In this implementation, some parameters have been changed from their default
values as described in [1], in order to replicate published results. All changes
are noted in comments.

Example usage
-------------

# Get optimal alignment of two phonetic sequences

>>> align('θin', 'tenwis') # doctest: +SKIP
[[('θ', 't'), ('i', 'e'), ('n', 'n'), ('-', 'w'), ('-', 'i'), ('-', 's')]]

[1] G. Kondrak. Algorithms for Language Reconstruction. PhD dissertation,
University of Toronto.
    Ninfi#   -      )<BNRbcdfghjklmnpqrstvxz   ç   ð   ħ   ŋ   ɖ   ɟ   ɢ   ɣ   ɦ   ɬ   ɮ   ɰ   ɱ   ɲ   ɳ   ɴ   ɸ   ɹ   ɻ   ɽ   ɾ   ʀ   ʁ   ʂ   ʃ   ʈ   ʋu   ʐ    ʒ   ʔ   ʕ   ʙ   ʝ   β   θ   χ   ʐw)	aspiratedlateralmannernasalplace	retroflexsyllabicvoice)
backrC   longrD   rE   rF   rG   roundrH   rI   bilabial      ?labiodentalgffffff?dentalg?alveolarg333333?rG   g?zpalato-alveolarg      ?palatalgffffff?velarg333333?uvularg      ?
pharyngealg333333?glottalg?
labiovelarvowelg      stop	affricate	fricativetrillg?g?g?        )tapapproximantz
high vowelz	mid vowelz	low vowelvowel2highmidlowfrontcentralrJ   plusminus(   2      
         )rH   rF   rD   rI   rE   rG   rC   rB   rK   ra   rJ   rL   r   rg   )rF   rD   rH   rI   rE   rG   rC   rB   r
   rf   r   r   r6   r!   r   r"   r   r   r   r#   r9   r   r)   r   r+   r*   r    r,   r   r;   r   r   r2   r	   r1   r^   r0   r-   r=   r   r   r>   r   r   r   r5   r8   r4   r@   r   r<   r   r$   r?   r3   r   r:   r   r%   r&   r'   r7   r_   r.   r/   r   r(   r   rA   ir`   ra   rd   )rF   rD   rH   rI   rE   rG   rC   ra   rJ   rL   rK   rB   yerb   E   øu   ɛu   œ   ærc   are   rJ   )Au   ɨu   ʉu   əuUoOu   ɔu   ɒIc                    t           t          d          d|cxk    rdk    sn J d            t          |           }t          |          }t          j        |dz   |dz   ft                    }t          d|dz             D ];}t          d|dz             D ]#}||dz
  |f         t          | |dz
                     z   }|||dz
  f         t          ||dz
                     z   }	||dz
  |dz
  f         t          | |dz
           ||dz
                     z   }
|dk    r6||dz
  |dz
  f         t          ||dz
           | |dz
  |                   z   }nt           }|dk    r6||dz
  |dz
  f         t          | |dz
           ||dz
  |                   z   }nt           }t          ||	|
||d	          |||f<   %=d|z
  t          j        |          z  }g }t          d|dz             D ]O}t          d|dz             D ]9}|||f         |k    r)|                    t          ||d	||| |g                      :P|S )
a  
    Compute the alignment of two phonetic strings.

    :param str str1: First string to be aligned
    :param str str2: Second string to be aligned

    :type epsilon: float (0.0 to 1.0)
    :param epsilon: Adjusts threshold similarity score for near-optimal alignments

    :rtype: list(list(tuple(str, str)))
    :return: Alignment(s) of str1 and str2

    (Kondrak 2002: 51)
    Nz1You need numpy in order to use the align functionr]   rN   z$Epsilon must be between 0.0 and 1.0.   )dtyperm   r   )npImportErrorlenzerosfloatrange
sigma_skip	sigma_sub	sigma_expr   maxamaxappend	_retrieve)str1str2epsilonr   r   Srn   r   edit1edit2edit3edit4edit5T
alignmentss                  2lib/python3.11/site-packages/nltk/metrics/aline.pyalignr   6  s    
zMNNN'    S     "H   D		AD		A
!a%Qu---A
 1a!e__ @ @q!a% 	@ 	@Aa!eQhK*T!a%["9"99EaQhK*T!a%["9"99Ea!eQUlOiQUT!a%[&I&IIE1uu!a%Q,)DQKa!eai*Q*QQ1uu!a%Q,)DQKa!eai*Q*QQ%ueQ??AadGG	@ 
W

"AJ1a!e__ L Lq!a% 	L 	LAAw!||!!)Aq!Q4r"J"JKKK	L     c                 6   || |f         dk    r|S |dk    r|| dz
  |dz
  f         t          || dz
           ||dz
  |                   z   |z   |k    ro|                    d|| dz
           ||dz
  |         f           t          | dz
  |dz
  |t          || dz
           ||dz
  |                   z   |||||           nW| dk    r|| dz
  |dz
  f         t          ||dz
           || dz
  |                    z   |z   |k    ro|                    d|| dz
  |          ||dz
           f           t          | dz
  |dz
  |t          ||dz
           || dz
  |                    z   |||||           n|| |dz
  f         t          ||dz
                     z   |z   |k    rU|                    dd||dz
           f           t          | |dz
  |t          ||dz
                     z   |||||           n$|| dz
  |f         t          || dz
                     z   |z   |k    rT|                    d|| dz
           df           t          | dz
  ||t          || dz
                     z   |||||           n|| dz
  |dz
  f         t	          || dz
           ||dz
                     z   |z   |k    ri|                    d|| dz
           ||dz
           f           t          | dz
  |dz
  |t	          || dz
           ||dz
                     z   |||||           |S )z
    Retrieve the path through the similarity matrix S starting at (i, j).

    :rtype: list(tuple(str, str))
    :return: Alignment of str1 and str2
    r   r|   rm   -)r   insertr   r   r   )rn   r   r   r   r   r   r   outs           r   r   r   j  s    	Aw!||
q55Qq1ua!e|_ya!ed1q519o'N'NNQRRVWWWJJq4A;QUQY8999AAId1q5k4A	?;;;	 	 	 	 EEaAq1uo	$q1u+tAEAI(O(OORSSWXXXJJq4A	?DQK8999AAId1q5k4A	?;;;	 	 	 	 q!a%x[:d1q5k222Q6!;;JJq3QU,---aQJtAE{$;$; ;Q4sSSSSq1uax[:d1q5k222Q6!;;JJq4A;,---a!eQJtAE{$;$; ;Q4sSSSSq1ua!e|_ya!ed1q5kBBBQF!KKJJq4A;QU4555AAId1q5k4A;777	 	 	 Jr   c                     t           S )zA
    Returns score of an indel of P.

    (Kondrak 2002: 54)
    )C_skipr   s    r   r   r     s	     Mr   c                 r    t           t          | |          z
  t          |           z
  t          |          z
  S )zN
    Returns score of a substitution of P with Q.

    (Kondrak 2002: 54)
    )C_subdeltaVr   r   s     r   r   r     s-     5A;;1%!,,r   c                     |d         }|d         }t           t          | |          z
  t          | |          z
  t          |           z
  t          t          |          t          |                    z
  S )zL
    Returns score of an expansion/compression.

    (Kondrak 2002: 54)
    r   r|   )C_expr   r   r   )r   r   q1q2s       r   r   r     s\     
1B	
1B5B<<%2,,.15AbEE1R558I8IIIr   c                 x    t          | |          }d}|D ]$}|t          | ||          t          |         z  z  }%|S )zT
    Return weighted sum of difference between P and Q.

    (Kondrak 2002: 54)
    r   )r	   diffsalience)r   r   featurestotalr   s        r   r   r     sI     AwwHE - -aA!,,Lr   c                     t           |          t           |         }}t          t          ||                  t          ||                  z
            S )zi
    Returns difference between phonetic segments P and Q for feature F.

    (Kondrak 2002: 52, 54)
    )feature_matrixabssimilarity_matrix)r   r   r   
p_features
q_featuress        r   r   r     s>     ,A.q0A
J A/2CJqM2RRSSSr   c                 B    | t           v s	|t           v rt          S t          S )zR
    Return relevant features for segment comparison.

    (Kondrak 2002: 54)
    )
consonantsR_cR_vr   s     r   r	   r	     s     	J!z//
Jr   c                 &    | t           v rdS t          S )zD
    Return vowel weight if P is vowel.

    (Kondrak 2002: 54)
    r   )r   C_vwlr   s    r   r   r     s     	JqLr   c                  $   d t                               d          D             } | D ]h}t          |d         |d                   d         }d |D             }d                    |          }t	          |d          d|d          d|            id	S )
zq
    A demonstration of the result of aligning phonetic sequences
    used in Kondrak's (2002) dissertation.
    c                 8    g | ]}|                     d           S ),)split).0pairs     r   
<listcomp>zdemo.<locals>.<listcomp>  s"    AAADJJsOOAAAr   
r   r|   c                 8    g | ]}d |d          d|d          dS )(r   z, r|   ) )r   rt   s     r   r   zdemo.<locals>.<listcomp>  s4    <<<Q(1((1(((<<<r    z ~ z : N)cognate_datar   r   joinprint)datar   	alignments      r   demor     s    
 BA(:(:4(@(@AAAD 6 6$q'47++A.	<<)<<<	HHY''	a44T!W44445555	6 6r   uQ  jo,ʒə
tu,ty
nosotros,nu
kjen,ki
ke,kwa
todos,tu
una,ən
dos,dø
tres,trwa
ombre,om
arbol,arbrə
pluma,plym
kabeθa,kap
boka,buʃ
pje,pje
koraθon,kœr
ber,vwar
benir,vənir
deθir,dir
pobre,povrə
ðis,dIzes
ðæt,das
wat,vas
nat,nixt
loŋ,laŋ
mæn,man
fleʃ,flajʃ
bləd,blyt
feðər,fEdər
hær,hAr
ir,Or
aj,awgə
nowz,nAzə
mawθ,munt
təŋ,tsuŋə
fut,fys
nij,knI
hænd,hant
hart,herts
livər,lEbər
ænd,ante
æt,ad
blow,flAre
ir,awris
ijt,edere
fiʃ,piʃkis
flow,fluere
staɾ,stella
ful,plenus
græs,gramen
hart,kordis
horn,korny
aj,ego
nij,genU
məðər,mAter
mawntən,mons
nejm,nomen
njuw,nowus
wən,unus
rawnd,rotundus
sow,suere
sit,sedere
θrij,tres
tuwθ,dentis
θin,tenwis
kinwawa,kenuaʔ
nina,nenah
napewa,napɛw
wapimini,wapemen
namesa,namɛʔs
okimawa,okemaw
ʃiʃipa,seʔsep
ahkohkwa,ahkɛh
pematesiweni,pematesewen
asenja,aʔsɛn__main__)r   )__doc__numpyr~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r	   r   r   r   __name__r   r   r   <module>r      s   >   	BBB
 eEll 


	= = =
@	 	 	  %% 4% c	%
 % % t% s% S% c% #% s% #% T%  C!%" #%$ %%& S'%( I% % % R   i	
 	
i 	
 	
i, 	
 	
-i@ 	
 	
AiT 		 	Uih 		 	ii| 	
 	
}iP 		 	Qid 	
 	
eix 	
 	
yiL 	
 	
Mi` 		 	ait 		 	uiH 	
 	
Ii\ 		 	]ip 	
 	
qiD 		 	Ei iX 		 	Yil 		 	mi@ 		 	AiT 	
 	
Uih 		 	ii| 	
 	
}iP 	
 	
Qid 		 	eix 	
 	
yiL 		 	Mi` 		 	ait 		 	uiH	 		 	I	i\	 	
 	
]	ip	 	
 	
q	iD
 		 	E
iX
 		 	Y
i i il
 	
 	
m
i@ 	
 	
AiT 	"	 	Uih 	"	 	ii| 		 	}iP 		 	Qid 		 	eix 		 	yiL 	
 	
Mi` 		 	ait 		 	uiH 		 	Ii\ 		 	]ip 		 	qiD 	
 	
EiX 		 	Yil 		 	mi i i@ 		 	AiT 		 	Uih 		 	ii| 		 	}iP 	
 	
Qid 		 	eix 	
 	
yiL 	
 	
Mib 
 
ci~ 
 
iZ 
 
[iv 
 
wiR 	 Sin 	 oiJ 	 Kif 	 giB 
 
Ci i` 
 
       
 
 
 
 
 
 
 
     
 
wi i i\1 1 1 1h6 6 6r  - - -J J J
 
 
T T T    
6 
6 
6JX zDFFFFF s   	 