
    QdP                         d Z ddlmZ ddlmZ ddlmZ ddlmZ d Z	 G d d          Z
 G d	 d
          Z G d d          ZdS )a  
Common methods and classes for all IBM models. See ``IBMModel1``,
``IBMModel2``, ``IBMModel3``, ``IBMModel4``, and ``IBMModel5``
for specific implementations.

The IBM models are a series of generative models that learn lexical
translation probabilities, p(target language word|source language word),
given a sentence-aligned parallel corpus.

The models increase in sophistication from model 1 to 5. Typically, the
output of lower models is used to seed the higher models. All models
use the Expectation-Maximization (EM) algorithm to learn various
probability tables.

Words in a sentence are one-indexed. The first word of a sentence has
position 1, not 0. Index 0 is reserved in the source sentence for the
NULL token. The concept of position does not apply to NULL, but it is
indexed at 0 by convention.

Each target word is aligned to exactly one source word or the NULL
token.

References:
Philipp Koehn. 2010. Statistical Machine Translation.
Cambridge University Press, New York.

Peter E Brown, Stephen A. Della Pietra, Vincent J. Della Pietra, and
Robert L. Mercer. 1993. The Mathematics of Statistical Machine
Translation: Parameter Estimation. Computational Linguistics, 19 (2),
263-311.
    )insort_leftdefaultdict)deepcopy)ceilc                 \    d}| D ]&}t          |j                  }t          ||          }'|S )z
    :param sentence_aligned_corpus: Parallel corpus under consideration
    :type sentence_aligned_corpus: list(AlignedSent)
    :return: Number of words in the longest target language sentence
        of ``sentence_aligned_corpus``
    r   )lenwordsmax)sentence_aligned_corpusmax_maligned_sentencems       8lib/python3.11/site-packages/nltk/translate/ibm_model.pylongest_target_sentence_lengthr   .   s>     E3   &''AuL    c                   j    e Zd ZdZdZd Zd Zd Zd Zd Z	dd
Z
ddZddZd Zd Zd Zd Zd ZdS )IBMModelz0
    Abstract base class for all IBM models
    g-q=c                 X    |                      |           |                                  d S N)
init_vocabreset_probabilitiesselfr   s     r   __init__zIBMModel.__init__H   s-    /000  """""r   c                      t          d            _        	 t          d            _        	 t           fd           _        	 d _        d S )Nc                  "    t          d           S )Nc                      t           j        S r   r   MIN_PROB r   r   <lambda>z@IBMModel.reset_probabilities.<locals>.<lambda>.<locals>.<lambda>N   s	    (9 r   r   r!   r   r   r"   z.IBMModel.reset_probabilities.<locals>.<lambda>N   s    K 9 9:: r   c                  "    t          d           S )Nc                  "    t          d           S )Nc                  "    t          d           S )Nc                      t           j        S r   r   r!   r   r   r"   zdIBMModel.reset_probabilities.<locals>.<lambda>.<locals>.<lambda>.<locals>.<lambda>.<locals>.<lambda>W   s	    @Q r   r   r!   r   r   r"   zRIBMModel.reset_probabilities.<locals>.<lambda>.<locals>.<lambda>.<locals>.<lambda>W   s    K8Q8Q,R,R r   r   r!   r   r   r"   z@IBMModel.reset_probabilities.<locals>.<lambda>.<locals>.<lambda>W   s    $R$RSS r   r   r!   r   r   r"   z.IBMModel.reset_probabilities.<locals>.<lambda>V   s    KSS  r   c                  (    t           fd          S )Nc                       j         S r   )r    r   s   r   r"   z@IBMModel.reset_probabilities.<locals>.<lambda>.<locals>.<lambda>`   s	    t} r   r   r)   s   r   r"   z.IBMModel.reset_probabilities.<locals>.<lambda>`   s    ;?T?T?T?T3U3U r   g      ?)r   translation_tablealignment_tablefertility_tablep1r)   s   `r   r   zIBMModel.reset_probabilitiesL   sw    !,::"
 "
	
  +  
  

	  ++U+U+U+UVV	 	 	r   c                     dS )z
        Initialize probability tables to a uniform distribution

        Derived classes should implement this accordingly.
        Nr!   r   s     r   set_uniform_probabilitiesz"IBMModel.set_uniform_probabilitiesn   s	     	r   c                     t                      }t                      }|D ]6}|                    |j                   |                    |j                   7|                    d            || _        	 || _        d S r   )setupdater
   motsadd	src_vocab	trg_vocab)r   r   r5   r6   r   s        r   r   zIBMModel.init_vocabv   s    EE	EE	 7 	4 	4-3444-23333d"	 #	 	r   c                 J   t                      }t          |j                  }t          |j                  }|                     |          }|                     |          }|                    |                     |                     |}t          d|dz             D ]}t          d|dz             D ]l}	|                     |||	          }|                     ||          }|                     ||          }
|                    |
           |j	        |j	        k    r|}m||fS )a  
        Sample the most probable alignments from the entire alignment
        space

        First, determine the best alignment according to IBM Model 2.
        With this initial alignment, use hill climbing to determine the
        best alignment according to a higher IBM Model. Add this
        alignment and its neighbors to the sample set. Repeat this
        process with other initial alignments obtained by pegging an
        alignment point.

        Hill climbing may be stuck in a local maxima, hence the pegging
        and trying out of different alignments.

        :param sentence_pair: Source and target language sentence pair
            to generate a sample of alignments from
        :type sentence_pair: AlignedSent

        :return: A set of best alignments represented by their ``AlignmentInfo``
            and the best alignment of the set for convenience
        :rtype: set(AlignmentInfo), AlignmentInfo
           r   )
r1   r	   r3   r
   best_model2_alignment	hillclimbr2   neighboringrangescore)r   sentence_pairsampled_alignmentslr   initial_alignmentpotential_alignmentbest_alignmentji	neighborss              r   samplezIBMModel.sample   s8   . !UU"###$$ !66}EE"nn->??!!$"2"23F"G"GHHH, q!a% 	9 	9A1a!e__ 9 9$($>$>}aQR$S$S!&*nn5F&J&J# ,,-@!DD	")))444&,~/CCC%8N9 ">11r   Nr   c                    dg|j         z   }dg|j        z   }t          |          dz
  }t          |          dz
  }dg|dz   z  }d t          |dz             D             }	t          d|dz             D ]}
|
|k    r|}nqd}t          j        }||
         }t          d|dz             D ]G}||         }| j        |         |         | j        |         |
         |         |         z  }||k    r|}|}H|||
<   |	|                             |
           t          t          |          t          |          t          |          |	          S )aT  
        Finds the best alignment according to IBM Model 2

        Used as a starting point for hill climbing in Models 3 and
        above, because it is easier to compute than the best alignments
        in higher models

        :param sentence_pair: Source and target language sentence pair
            to be word-aligned
        :type sentence_pair: AlignedSent

        :param j_pegged: If specified, the alignment point of j_pegged
            will be fixed to i_pegged
        :type j_pegged: int

        :param i_pegged: Alignment point to j_pegged
        :type i_pegged: int
        NUNUSEDr8   r   c                     g | ]}g S r!   r!   ).0rE   s     r   
<listcomp>z2IBMModel.best_model2_alignment.<locals>.<listcomp>   s    ******r   )r3   r
   r	   r<   r   r    r*   r+   appendAlignmentInfotuple)r   r>   j_peggedi_peggedsrc_sentencetrg_sentencer@   r   	alignmentceptsrD   best_imax_alignment_probtrE   salignment_probs                    r   r9   zIBMModel.best_model2_alignment   s|   & v 22 zM$77!!C1q5M	**U1q5\\***q!a% 	$ 	$AH}}!%-%6" Oq!a% # #A$QA.q1!4t7KA7Nq7QRS7TUV7WW # &);;;-;*!"!IaL&M  ####)eL1153F3F
 
 	
r   c                     |}|                      |          }	 |}|                     ||          D ]!}|                      |          }||k    r|}|}"||k    rnB||_        |S )a,  
        Starting from the alignment in ``alignment_info``, look at
        neighboring alignments iteratively for the best one

        There is no guarantee that the best alignment in the alignment
        space will be found, because the algorithm might be stuck in a
        local maximum.

        :param j_pegged: If specified, the search will be constrained to
            alignments where ``j_pegged`` remains unchanged
        :type j_pegged: int

        :return: The best alignment found from hill climbing
        :rtype: AlignmentInfo
        )prob_t_a_given_sr;   r=   )r   alignment_inforP   rT   max_probabilityold_alignmentneighbor_alignmentneighbor_probabilitys           r   r:   zIBMModel.hillclimb   s      #	//	::	%M&*&6&6y(&K&K ; ;"'+'<'<=O'P'P$'/99 2I&:OM))	 *	r   c                 b   t                      }t          |j                  dz
  }t          |j                  dz
  }|j        }|j        }t          d|dz             D ]}||k    rt          d|dz             D ]}	t          |          }
t          |          }||         }|	|
|<   t          ||	         |           ||         
                    |           t          t          |
          |j        |j        |          }|                    |           t          d|dz             D ]}||k    rt          d|dz             D ]}||k    r||k    rt          |          }
t          |          }||         }||         }	||
|<   |	|
|<   ||         
                    |           t          ||         |           ||	         
                    |           t          ||	         |           t          t          |
          |j        |j        |          }|                    |           |S )a  
        Determine the neighbors of ``alignment_info``, obtained by
        moving or swapping one alignment point

        :param j_pegged: If specified, neighbors that have a different
            alignment point from j_pegged will not be considered
        :type j_pegged: int

        :return: A set neighboring alignments represented by their
            ``AlignmentInfo``
        :rtype: set(AlignmentInfo)
        r8   r   )r1   r	   rR   rS   rT   rU   r<   listr   r   removerN   rO   r4   )r   r]   rP   rF   r@   r   original_alignmentoriginal_ceptsrD   rE   new_alignment	new_ceptsold_inew_alignment_infoother_jother_is                   r   r;   zIBMModel.neighboring  sd    EE	+,,q0+,,q0+5'-q!a% 	6 	6AH}}q!a% 6 6A$();$<$<M ( 8 8I.q1E ()M!$  	!a000e$++A...)6m,,&3&3!	* *& MM"45555q!a% 	: 	:AH}}$QA : :G(**w!||(,-?(@(@$,^$<$<	"4W"=.q1 ,3a(12g. "'*11':::#Ig$6:::!!++A...#IaL':::-:!-00*7*7%	. .* "&8999r   c                     |j                                         D ]S\  }}|D ]K}|j         |         |         |j        |         z  }t          |t          j                  | j        |         |<   LTd S r   )	t_given_sitemsany_t_given_sr   r   r    r*   )r   countsrX   	src_wordsrY   estimates         r   *maximize_lexical_translation_probabilitiesz3IBMModel.maximize_lexical_translation_probabilitiesY  s    ",2244 	P 	PLAy P P!+A.q1F4H4KK/28X=N/O/O&q)!,,P	P 	Pr   c                     |j                                         D ]S\  }}|D ]K}|j         |         |         |j        |         z  }t          |t          j                  | j        |         |<   LTd S r   )	fertilityro   fertility_for_any_phir   r   r    r,   )r   rq   phirr   rY   rs   s         r    maximize_fertility_probabilitiesz)IBMModel.maximize_fertility_probabilities_  s    $.4466 	P 	PNC P P!+C03f6RST6UU/28X=N/O/O$S)!,,P	P 	Pr   c                     |j         |j         |j        z   z  }t          |t          j                  }t          |dt          j        z
            | _         d S )Nr8   )r-   p0r   r   r    min)r   rq   p1_estimates      r   &maximize_null_generation_probabilitiesz/IBMModel.maximize_null_generation_probabilitiese  sH    i69vy#89+x'899 k1x'8#899r   c                 D    d}|D ]}||                      |          z  }|S Nr   )r\   )r   
alignmentsprobabilityr]   s       r   prob_of_alignmentszIBMModel.prob_of_alignmentsl  s8    ( 	A 	AN400@@@KKr   c                     dS )z
        Probability of target sentence and an alignment given the
        source sentence

        All required information is assumed to be in ``alignment_info``
        and self.

        Derived classes should override this method
                r!   )r   r]   s     r   r\   zIBMModel.prob_t_a_given_sr  s	     sr   r   r   )__name__
__module____qualname____doc__r    r   r   r/   r   rG   r9   r:   r;   rt   ry   r~   r   r\   r!   r   r   r   r   <   s         H# # #     D    &,2 ,2 ,2\4
 4
 4
 4
l! ! ! !FG G G GRP P PP P P: : :  
 
 
 
 
r   r   c                   N    e Zd ZdZd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zd ZdS )rN   a  
    Helper data object for training IBM Models 3 and up

    Read-only. For a source sentence and its counterpart in the target
    language, this class holds information about the sentence pair's
    alignment, cepts, and fertility.

    Warning: Alignments are one-indexed here, in contrast to
    nltk.translate.Alignment and AlignedSent, which are zero-indexed
    This class is not meant to be used outside of IBM models.
    c                     t          |t                    st          d          || _        	 || _        	 || _        	 || _        	 d | _        d S )Nz\The alignment must be a tuple because it is used to uniquely identify AlignmentInfo objects.)
isinstancerO   	TypeErrorrT   rR   rS   rU   r=   )r   rT   rR   rS   rU   s        r   r   zAlignmentInfo.__init__  sx    )U++ 	>  
 #	 )	
 )	 
	 
	 	r   c                 6    t          | j        |                   S )zL
        Fertility of word in position ``i`` of the source sentence
        )r	   rU   )r   rE   s     r   fertility_of_izAlignmentInfo.fertility_of_i  s     4:a=!!!r   c                 J    | j         |         }| j        |         d         |k    S )zo
        :return: Whether the word in position ``j`` of the target
            sentence is a head word
        r   )rT   rU   )r   rD   rE   s      r   is_head_wordzAlignmentInfo.is_head_word  s&    
 N1z!}Q1$$r   c                     |dS t          | j        |                   t          | j        |                   z  }t          t	          |                    S )z
        :return: The ceiling of the average positions of the words in
            the tablet of cept ``i``, or 0 if ``i`` is None
        Nr   )sumrU   r	   intr   )r   rE   average_positions      r   center_of_ceptzAlignmentInfo.center_of_cept  sL    
 91tz!}--DJqM0B0BB4())***r   c                     | j         |         }|dk    rt          d          |dz
  }|dk    r=|                     |          dk    r$|dz  }|dk    r|                     |          dk    $|dk    rd}|S )zm
        :return: The previous cept of ``j``, or None if ``j`` belongs to
            the first cept
        r   zNWords aligned to NULL cannot have a previous cept because NULL has no positionr8   N)rT   
ValueErrorr   )r   rD   rE   previous_cepts       r   r   zAlignmentInfo.previous_cept  s    
 N1664   AaD$7$7$F$F!$K$KQM aD$7$7$F$F!$K$K A Mr   c                     | j         |         }| j        |                             |          }|dk    rdS | j        |         |dz
           S )z
        :return: The position of the previous word that is in the same
            tablet as ``j``, or None if ``j`` is the first word of the
            tablet
        r   Nr8   )rT   rU   index)r   rD   rE   tablet_positions       r   previous_in_tabletz AlignmentInfo.previous_in_tablet  sO     N1*Q---a00a4z!}_q011r   c                     g }t          dt          | j                            D ]4}| j        |         dz
  }|dk     rd}|                    |dz
  |f           5|S )z
        :return: Zero-indexed alignment, suitable for use in external
            ``nltk.translate`` modules like ``nltk.translate.Alignment``
        :rtype: list(tuple)
        r8   r   N)r<   r	   rS   rT   rM   )r   zero_indexed_alignmentrD   rE   s       r   r   z$AlignmentInfo.zero_indexed_alignment  sr     "$q#d/0011 	6 	6Aq!A%A1uu"))1q5!*5555%%r   c                 "    | j         |j         k    S r   )rT   r   others     r   __eq__zAlignmentInfo.__eq__  s    ~00r   c                     | |k     S r   r!   r   s     r   __ne__zAlignmentInfo.__ne__  s    5=  r   c                 *    t          | j                  S r   )hashrT   r)   s    r   __hash__zAlignmentInfo.__hash__  s    DN###r   N)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r!   r   r   rN   rN     s        
 
( ( (T" " "% % %	+ 	+ 	+  &
2 
2 
2& & &1 1 1! ! !$ $ $ $ $r   rN   c                   *    e Zd ZdZd Zd Zd Zd ZdS )CountszK
    Data object to store counts of various parameters during training
    c                     t          d           | _        t          d           | _        d| _        d| _        t          d           | _        t          d           | _        d S )Nc                  "    t          d           S )Nc                      dS Nr   r!   r!   r   r   r"   z3Counts.__init__.<locals>.<lambda>.<locals>.<lambda>       r   r   r!   r   r   r"   z!Counts.__init__.<locals>.<lambda>      [-E-E r   c                      dS r   r!   r!   r   r   r"   z!Counts.__init__.<locals>.<lambda>  s     r   r   c                  "    t          d           S )Nc                      dS r   r!   r!   r   r   r"   z3Counts.__init__.<locals>.<lambda>.<locals>.<lambda>  r   r   r   r!   r   r   r"   z!Counts.__init__.<locals>.<lambda>  r   r   c                      dS r   r!   r!   r   r   r"   z!Counts.__init__.<locals>.<lambda>  s     r   )r   rn   rp   r{   r-   rv   rw   r)   s    r   r   zCounts.__init__  s`    $%E%EFF(55$%E%EFF%0%=%="""r   c                     |j         |         }|j        |         }|j        |         }| j        |         |xx         |z  cc<   | j        |xx         |z  cc<   d S r   )rT   rS   rR   rn   rp   )r   countr]   rD   rE   rX   rY   s          r   update_lexical_translationz!Counts.update_lexical_translation  sq    $Q''*'*q!%1&r   c                     t          |j                  dz
  }|                    d          }| xj        ||z  z  c_        | xj        |d|z  z
  |z  z  c_        d S )Nr8   r      )r	   rS   r   r-   r{   )r   r   r]   r   fertility_of_nulls        r   update_null_generationzCounts.update_null_generation  se    +,,q0*99!<<$u,,A---66r   c                     t          dt          |j                            D ]T}|j        |         }|                    |          }| j        |         |xx         |z  cc<   | j        |xx         |z  cc<   Ud S r   )r<   r	   rR   r   rv   rw   )r   r   r]   rE   rY   rx   s         r   update_fertilityzCounts.update_fertility   s    q#n9::;; 	3 	3A+A.A //22CN3"""e+"""&q)))U2))))		3 	3r   N)r   r   r   r   r   r   r   r   r!   r   r   r   r     sZ         > > >' ' '7 7 73 3 3 3 3r   r   N)r   bisectr   collectionsr   copyr   mathr   r   r   rN   r   r!   r   r   <module>r      s    @       # # # # # #              @ @ @ @ @ @ @ @F
D$ D$ D$ D$ D$ D$ D$ D$N3 3 3 3 3 3 3 3 3 3r   