
    Qdf                         d dl mZ d ZdS )    )defaultdictc                 r   	
 d |                                 D             }d |                                 D             }g d
t          |                              t          |                    	t          |                              t          |                    t	          t                    	D ];\  }}d                             |           d                             |           <	
 fd}	 fd} |              ||            ||           t          	          S )u"  
    This module symmetrisatizes the source-to-target and target-to-source
    word alignment output and produces, aka. GDFA algorithm (Koehn, 2005).

    Step 1: Find the intersection of the bidirectional alignment.

    Step 2: Search for additional neighbor alignment points to be added, given
            these criteria: (i) neighbor alignments points are not in the
            intersection and (ii) neighbor alignments are in the union.

    Step 3: Add all other alignment points that are not in the intersection, not in
            the neighboring alignments that met the criteria but in the original
            forward/backward alignment outputs.

        >>> forw = ('0-0 2-1 9-2 21-3 10-4 7-5 11-6 9-7 12-8 1-9 3-10 '
        ...         '4-11 17-12 17-13 25-14 13-15 24-16 11-17 28-18')
        >>> back = ('0-0 1-9 2-9 3-10 4-11 5-12 6-6 7-5 8-6 9-7 10-4 '
        ...         '11-6 12-8 13-12 15-12 17-13 18-13 19-12 20-13 '
        ...         '21-3 22-12 23-14 24-17 25-15 26-17 27-18 28-18')
        >>> srctext = ("この よう な ハロー 白色 わい 星 の Ｌ 関数 "
        ...            "は Ｌ と 共 に 不連続 に 増加 する こと が "
        ...            "期待 さ れる こと を 示し た 。")
        >>> trgtext = ("Therefore , we expect that the luminosity function "
        ...            "of such halo white dwarfs increases discontinuously "
        ...            "with the luminosity .")
        >>> srclen = len(srctext.split())
        >>> trglen = len(trgtext.split())
        >>>
        >>> gdfa = grow_diag_final_and(srclen, trglen, forw, back)
        >>> gdfa == sorted(set([(28, 18), (6, 6), (24, 17), (2, 1), (15, 12), (13, 12),
        ...         (2, 9), (3, 10), (26, 17), (25, 15), (8, 6), (9, 7), (20,
        ...         13), (18, 13), (0, 0), (10, 4), (13, 15), (23, 14), (7, 5),
        ...         (25, 14), (1, 9), (17, 13), (4, 11), (11, 17), (9, 2), (22,
        ...         12), (27, 18), (24, 16), (21, 3), (19, 12), (17, 12), (5,
        ...         12), (11, 6), (12, 8)]))
        True

    References:
    Koehn, P., A. Axelrod, A. Birch, C. Callison, M. Osborne, and D. Talbot.
    2005. Edinburgh System Description for the 2005 IWSLT Speech
    Translation Evaluation. In MT Eval Workshop.

    :type srclen: int
    :param srclen: the number of tokens in the source language
    :type trglen: int
    :param trglen: the number of tokens in the target language
    :type e2f: str
    :param e2f: the forward word alignment outputs from source-to-target
                language (in pharaoh output format)
    :type f2e: str
    :param f2e: the backward word alignment outputs from target-to-source
                language (in pharaoh output format)
    :rtype: set(tuple(int))
    :return: the symmetrized alignment points from the GDFA algorithm
    c           
      x    g | ]7}t          t          t          |                    d                               8S -tuplemapintsplit.0as     3lib/python3.11/site-packages/nltk/translate/gdfa.py
<listcomp>z'grow_diag_final_and.<locals>.<listcomp>E   4    
>
>
>Q5S!''#,,''((
>
>
>    c           
      x    g | ]7}t          t          t          |                    d                               8S r   r   r   s     r   r   z'grow_diag_final_and.<locals>.<listcomp>F   r   r   ))r   )r   r   )   r   )r   r   )r   r   )r   r   )r   r   )r   r   efc            
         t                    dz
  } | t                    k     rd}t          
          D ]}t                    D ]}||fv r	D ]}t          d t          ||f|          D                       }|\  }}|vrZ|vrV|v rR                    |           d                             |           d                             |           | dz  } d}|rdS | t                    k     dS dS )zz
        Search for the neighbor points and them to the intersected alignment
        points if criteria are met.
        r   Tc              3   &   K   | ]\  }}||z   V  d S )N )r   ijs      r   	<genexpr>z9grow_diag_final_and.<locals>.grow_diag.<locals>.<genexpr>c   s*      ,U,Utq!QU,U,U,U,U,U,Ur   r   r   FN)lenranger	   zipadd)prev_lenno_new_pointsr   r   neighbore_newf_newaligned	alignment	neighborssrclentrglenunions          r   	grow_diagz&grow_diag_final_and.<locals>.grow_diagR   sM   
 y>>A%Y'' M6]] 6 6v 6 6A1v**(1 6 6H',,U,UsAq68?T?T,U,U,U'U'UH+3LE5 !&W 4 4g9M9M"*e"3"3 )h 7 7 7 ' 0 0 7 7 7 ' 0 0 7 7 7 (A05!6$  1 Y''''''r   c                    t                    D ]o}t                    D ]]}|vrW|vrS||fv rM                    ||f           d                             |           d                             |           ^pdS )z
        Adds remaining points that are not in the intersection, not in the
        neighboring alignments but in the original *e2f* and *f2e* alignments
        r   r   N)r    r"   )r   r&   r'   r(   r)   r+   r,   r-   s      r   	final_andz&grow_diag_final_and.<locals>.final_ands   s     6]] 	, 	,Ev 
, 
, ((W,,%//MM5%.111CL$$U+++CL$$U+++
,	, 	,r   )r   setintersectionr-   r   r"   sorted)r+   r,   e2ff2er   r   r.   r0   r(   r)   r*   r-   s   ``      @@@@r   grow_diag_final_andr6      sr   t ?
>#))++
>
>
>C
>
>#))++
>
>
>CVVVIC%%c#hh//IHHNN3s88$$E #G  1         B, , , , , , , , ,( IKKKIcNNNIcNNN)r   N)collectionsr   r6   r   r   r   <module>r8      s7    $ # # # # #    r   