
    Qd"                         d Z ddlZ	 ddlmZ ddlmZ n# e$ r d Zd Zd ZY nw xY w ej	        d          Z
 G d	 d
          Zd Zd ZefdZefdZd Zd ZdS )z

A port of the Gale-Church Aligner.

Gale & Church (1993), A Program for Aligning Sentences in Bilingual Corpora.
https://aclweb.org/anthology/J93-1004.pdf

    N)logsf)normc                     t          |           }ddd|z  z   z  }|t          j        | |z  dz
  |d|d|d|d|d|d	|d
|d|dz  z   z  z   z  z   z  z   z  z   z  z   z  z   z  z   z  z             z  }| dk    r|S d|z
  S )zComplementary error function.         ?gś??g5 ?g`yg?gƸ?gꪂIǿg#v?g9)gS?gޅ1Ogv(?g        g       @)absmathexp)xztrs       :lib/python3.11/site-packages/nltk/translate/gale_church.pyerfccr      s    FFS1WBF"' *"#$/&''1Aq:~9U4V'V'X%X#"!"	

 
 
< 88H7N    c                 V    ddt          | t          j        d          z            z  z
  S )u>   Return the area under the normal distribution from M{-∞..x}.r   r      )r   r	   sqrtr   s    r   norm_cdfr   @   s'    3q49Q<</00000r   c                     	 t          j        dt          |           z
            S # t          $ r t	          d          cY S w xY w)Nr   -inf)r	   logr   
ValueErrorfloatr   s    r   
norm_logsfr   D   sN    	!8AO,,, 	! 	! 	!==   	!s   #& AAr   c                   (    e Zd ZdddddddZdZdZdS )	LanguageIndependentgׁsF?g{Gz?gbX9ȶ?gI+?))r   r   )r   r   )r   r   )r   r   )r   r   )r   r   r   g333333@N)__name__
__module____qualname__PRIORSAVERAGE_CHARACTERSVARIANCE_CHARACTERS r   r   r   r   N   s>          F r   r   c                    g }t          |          t          |          f}|dk    rt          d |D                       r	 | |         \  }}n&# t          $ r |d         dz
  |d         dz
  f}Y Nw xY wt          |          D ]C}t          |          D ]1}|                    |d         |z
  dz
  |d         |z
  dz
  f           2D|d         |z
  |d         |z
  f}|dk    rt          d |D                       |ddd         S )a  
    Traverse the alignment cost from the tracebacks and retrieves
    appropriate sentence pairs.

    :param backlinks: A dictionary where the key is the alignment points and value is the cost (referencing the LanguageIndependent.PRIORS)
    :type backlinks: dict
    :param source_sents_lens: A list of target sentences' lengths
    :type source_sents_lens: list(int)
    :param target_sents_lens: A list of target sentences' lengths
    :type target_sents_lens: list(int)
    )r   r   c              3   "   K   | ]
}|d k    V  dS )r   Nr%   ).0ps     r   	<genexpr>ztrace.<locals>.<genexpr>n   s&      $>$>Q!V$>$>$>$>$>$>r   r   r   N)lenall	TypeErrorrangeappend)		backlinkssource_sents_lenstarget_sents_lenslinkspositionsr   ijs	            r   tracer9   `   sZ    E%&&,=(>(>?H
f

$>$>X$>$>$>!>!>
	X&DAqq 	 	 	 a!q9HH	 q 	I 	IA1XX I IhqkAo18A;?Q3FGHHHHIQK!OXa[1_5 f

$>$>X$>$>$>!>!>
 2;s   A  A0/A0c                     t           fdt          |d                   D                       }t          fdt          |d                   D                       }	 |||j        z  z   dz  }||j        z  |z
  t          j        ||j        z            z  }	n# t          $ r t          d          cY S w xY wt          t          t          |	                    z   t          j        |j        |                   z    S )aP  Returns the log probability of the two sentences C{source_sents[i]}, C{target_sents[j]}
    being aligned with a specific C{alignment}.

    @param i: The offset of the source sentence.
    @param j: The offset of the target sentence.
    @param source_sents: The list of source sentence lengths.
    @param target_sents: The list of target sentence lengths.
    @param alignment: The alignment type, a tuple of two integers.
    @param params: The sentence alignment parameters.

    @returns: The log probability of a specific alignment between the two sentences, given the parameters.
    c              3   4   K   | ]}|z
  d z
           V  dS r   Nr%   )r(   offsetr7   source_sentss     r   r*   z!align_log_prob.<locals>.<genexpr>   0      MMvl1v:>*MMMMMMr   r   c              3   4   K   | ]}|z
  d z
           V  dS r<   r%   )r(   r=   r8   target_sentss     r   r*   z!align_log_prob.<locals>.<genexpr>   r?   r   r   r   r   )sumr/   r#   r	   r   r$   ZeroDivisionErrorr   LOG2r   r   r   r"   )
r7   r8   r>   rA   	alignmentparamsl_sl_tmdeltas
   ````      r   align_log_probrK   |   s    MMMMMy|9L9LMMM
M
MC
MMMMMy|9L9LMMM
M
MC 3222a7v0036$)**;
 ;
 
    V}} Js5zz***TXfmI6N-O-OOPPs   $:B B;:B;c                    t          |j                                                  }g g}i }t          t	          |           dz             D ]}t          t	          |          dz             D ]}t          d          }d}	|D ]`}
d|
d         z
  }||
d         z
  }|t	          |           k     s|dk     r3||         |         t          ||| ||
|          z   }||k     r|}|
}	a|t          d          k    rd}|	|||f<   |d                             |           t	          |          dk    r|                    d           |                    g            t          || |          S )a  Return the sentence alignment of two text blocks (usually paragraphs).

        >>> align_blocks([5,5,5], [7,7,7])
        [(0, 0), (1, 1), (2, 2)]
        >>> align_blocks([10,5,5], [12,20])
        [(0, 0), (1, 1), (2, 1)]
        >>> align_blocks([12,20], [10,5,5])
        [(0, 0), (1, 1), (1, 2)]
        >>> align_blocks([10,2,10,10,2,10], [12,3,20,3,12])
        [(0, 0), (1, 1), (2, 2), (3, 2), (4, 3), (5, 4)]

    @param source_sents_lens: The list of source sentence lengths.
    @param target_sents_lens: The list of target sentence lengths.
    @param params: the sentence alignment parameters.
    @return: The sentence alignments, a list of index pairs.
    r   infNr+   r   r   )
listr"   keysr/   r,   r   rK   r0   popr9   )r2   r3   rF   alignment_typesDr1   r7   r8   min_dist	min_alignaprev_iprev_jr)   s                 r   align_blocksrX      s   $ 6=--//00O 
AI3())A-..  s,--122 	# 	#AU||HI$ 
" 
"adQqTSVVG##vzzfIf%q+->6) )  x<< H !I5<<'' )Iq!fbELL""""q66A::EE!HHH	-/@AAAr   c                     t          |           t          |          k    rt          d          fdt          | |          D             S )a  Creates the sentence alignment of two texts.

    Texts can consist of several blocks. Block boundaries cannot be crossed by sentence
    alignment links.

    Each block consists of a list that contains the lengths (in characters) of the sentences
    in this block.

    @param source_blocks: The list of blocks in the source text.
    @param target_blocks: The list of blocks in the target text.
    @param params: the sentence alignment parameters.

    @returns: A list of sentence alignment lists
    z>Source and target texts do not have the same number of blocks.c                 8    g | ]\  }}t          ||          S r%   )rX   )r(   source_blocktarget_blockrF   s      r   
<listcomp>zalign_texts.<locals>.<listcomp>   s9       &L, 	\<88  r   )r,   r   zip)source_blockstarget_blocksrF   s     `r   align_textsra      sj     =S////L
 
 	
   *-m]*K*K   r   c              #   X    K    fd}	  |                                            V   )zSplits an iterator C{it} at values of C{split_value}.

    Each instance of C{split_value} is swallowed. The iterator produces
    subiterators which need to be consumed fully before the next subiterator
    can be used.
    c              3   \   K   | }|k    r |V                                   }|k    d S d S Nnext)firstvitsplit_values     r   _chunk_iteratorz!split_at.<locals>._chunk_iterator   sF      ;GGG		A ;r   re   )ri   rj   rk   s   `` r   split_atrl      sN           )obggii((((()r   c                 <    fdt          | |          D             S )zParses a stream of tokens and splits it into sentences (using C{soft_delimiter} tokens)
    and blocks (using C{hard_delimiter} tokens) for use with the L{align_texts} function.
    c                 D    g | ]}d  t          |          D             S )c                 @    g | ]}t          d  |D                       S )c              3   4   K   | ]}t          |          V  d S rd   )r,   )r(   tokens     r   r*   z;parse_token_stream.<locals>.<listcomp>.<listcomp>.<genexpr>  s(      44uE

444444r   )rB   )r(   sentence_its     r   r]   z1parse_token_stream.<locals>.<listcomp>.<listcomp>  s?     	
 	
 	
 4444444	
 	
 	
r   rl   )r(   block_itsoft_delimiters     r   r]   z&parse_token_stream.<locals>.<listcomp>  sL       
 		
 	
'.AA	
 	
 	
  r   rs   )streamru   hard_delimiters    ` r   parse_token_streamrx      s:       
 !88   r   )__doc__r	   r   r   r   scipy.statsImportErrorr   r   r   rD   r   r9   rK   rX   ra   rl   rx   r%   r   r   <module>r|      sV    4!((((((        1! 1! 1!% % %N1 1 1! ! ! ! ![1!h tx{{       $  8Q Q Q8 ?R 3B 3B 3B 3Bl 6I    :) ) )$
 
 
 
 
s    &&