
    Qde+                         d dl Z d dlmZ  G d d          Z G d de          Zd Zd Zd	 Z ed
ddg          Z	 G d d          Z
dS )    N)
namedtuplec                       e Zd ZdZddZed             Zed             Zd Zd Z	 eee	          Z
d Zd	 Zd
 Zd Zd ZdS )AlignedSenta#  
    Return an aligned sentence object, which encapsulates two sentences
    along with an ``Alignment`` between them.

    Typically used in machine translation to represent a sentence and
    its translation.

        >>> from nltk.translate import AlignedSent, Alignment
        >>> algnsent = AlignedSent(['klein', 'ist', 'das', 'Haus'],
        ...     ['the', 'house', 'is', 'small'], Alignment.fromstring('0-3 1-2 2-0 3-1'))
        >>> algnsent.words
        ['klein', 'ist', 'das', 'Haus']
        >>> algnsent.mots
        ['the', 'house', 'is', 'small']
        >>> algnsent.alignment
        Alignment([(0, 3), (1, 2), (2, 0), (3, 1)])
        >>> from nltk.corpus import comtrans
        >>> print(comtrans.aligned_sents()[54])
        <AlignedSent: 'Weshalb also sollten...' -> 'So why should EU arm...'>
        >>> print(comtrans.aligned_sents()[54].alignment)
        0-0 0-1 1-0 2-2 3-4 3-5 4-7 5-8 6-3 7-9 8-9 9-10 9-11 10-12 11-6 12-6 13-13

    :param words: Words in the target language sentence
    :type words: list(str)
    :param mots: Words in the source language sentence
    :type mots: list(str)
    :param alignment: Word-level alignments between ``words`` and ``mots``.
        Each alignment is represented as a 2-tuple (words_index, mots_index).
    :type alignment: Alignment
    Nc                     || _         || _        |t          g           | _        d S t	          |          t          u sJ || _        d S N)_words_mots	Alignment	alignmenttype)selfwordsmotsr   s       2lib/python3.11/site-packages/nltk/translate/api.py__init__zAlignedSent.__init__/   sI    
&r]]DNNN	??i////&DNNN    c                     | j         S r   )r   r   s    r   r   zAlignedSent.words8   s
    {r   c                     | j         S r   )r	   r   s    r   r   zAlignedSent.mots<   s
    zr   c                     | j         S r   )
_alignmentr   s    r   _get_alignmentzAlignedSent._get_alignment@   s
    r   c                 ~    t          t          | j                  t          | j                  |           || _        d S r   )_check_alignmentlenr   r   r   )r   r   s     r   _set_alignmentzAlignedSent._set_alignmentC   s/    TZ#di..)DDD#r   c                     dd                     d | j        D                       z  }dd                     d | j        D                       z  }d| d| d| j        dS )z_
        Return a string representation for this ``AlignedSent``.

        :rtype: str
        z[%s]z, c              3       K   | ]	}d |z  V  
dS z'%s'N .0ws     r   	<genexpr>z'AlignedSent.__repr__.<locals>.<genexpr>O   s&      #D#D1FQJ#D#D#D#D#D#Dr   c              3       K   | ]	}d |z  V  
dS r   r    r!   s     r   r$   z'AlignedSent.__repr__.<locals>.<genexpr>P   s&      "B"B!6A:"B"B"B"B"B"Br   zAlignedSent())joinr   r	   r   )r   r   r   s      r   __repr__zAlignedSent.__repr__I   su     $))#D#D#D#D#DDDE"B"Btz"B"B"BBBCCeCCtCCtCCCCr   c                    d}|dz  }| j         D ]}|d| d| dz  }| j        D ]}|d| d| dz  }| j        D ]'\  }}|d| j         |          d| j        |          dz  }(t          t	          | j                   d	z
            D ]4}|d
                    | j         |         | j         |d	z                      z  }5t          t	          | j                  d	z
            D ]4}|d                    | j        |         | j        |d	z                      z  }5|dd                    d | j         D                       z  z  }|dd                    d | j        D                       z  z  }|dz  }|S )z<
        Dot representation of the aligned sentence
        zgraph align {
znode[shape=plaintext]
"z_source" [label="z"] 
z_target" [label="z_source" -- "z
_target" 
   z)"{}_source" -- "{}_source" [style=invis]
z)"{}_target" -- "{}_target" [style=invis]
z{rank = same; %s}
 c              3       K   | ]	}d |z  V  
dS )z"%s_source"Nr    r!   s     r   r$   z&AlignedSent._to_dot.<locals>.<genexpr>u   s'      .V.VQ}q/@.V.V.V.V.V.Vr   c              3       K   | ]	}d |z  V  
dS )z"%s_target"Nr    r!   s     r   r$   z&AlignedSent._to_dot.<locals>.<genexpr>v   s'      .U.UQ}q/@.U.U.U.U.U.Ur   })r   r	   r   ranger   formatr'   )r   sr#   uvis         r   _to_dotzAlignedSent._to_dotT   s    	&&  	2 	2A1Q111111AA 	2 	2A1Q111111AA O 	M 	MDAqLT[^LL$*Q-LLLLAA s4;''!+,, 	 	A=DDAAE"  AA s4:*++ 	 	A=DD
1
1q5!  AA 	
"chh.V.V$+.V.V.V&V&VWW	"chh.U.U$*.U.U.U&U&UVV	Sr   c                 l   |                                                      d          }d}	 t          j        dd|z  gt          j        t          j        t          j                  }n"# t
          $ r}t          d          |d}~ww xY w|                    |          \  }}|                    d          S )zR
        Ipython magic : show SVG representation of this ``AlignedSent``.
        utf8svgdotz-T%s)stdinstdoutstderrz0Cannot find the dot binary from Graphviz packageN)	r6   encode
subprocessPopenPIPEOSError	Exceptioncommunicatedecode)r   
dot_stringoutput_formatprocesseouterrs          r   
_repr_svg_zAlignedSent._repr_svg_|   s     \\^^**622
	W &./ o!!	  GG  	W 	W 	WNOOUVV	W&&z22Szz&!!!s   ;A' '
B1BBc                     d                     | j                  dd         dz   }d                     | j                  dd         dz   }d| d| dS )zn
        Return a human-readable string representation for this ``AlignedSent``.

        :rtype: str
        r,   N   z...z<AlignedSent: 'z' -> 'z'>)r'   r   r	   )r   sourcetargets      r   __str__zAlignedSent.__str__   s_     $+&&ss+e3$*%%crc*U2999v9999r   c                 f    t          | j        | j        | j                                                  S )zm
        Return the aligned sentence pair, reversing the directionality

        :rtype: AlignedSent
        )r   r	   r   r   invertr   s    r   rS   zAlignedSent.invert   s(     4:t{DO4J4J4L4LMMMr   r   )__name__
__module____qualname____doc__r   propertyr   r   r   r   r   r(   r6   rL   rQ   rS   r    r   r   r   r      s         >' ' ' '   X   X  $ $ $ 88I	D 	D 	D& & &P" " "&: : :N N N N Nr   r   c                   T    e Zd ZdZd Zed             Zd Zd ZddZ	d Z
d	 Zd
 ZdS )r
   ac  
    A storage class for representing alignment between two sequences, s1, s2.
    In general, an alignment is a set of tuples of the form (i, j, ...)
    representing an alignment between the i-th element of s1 and the
    j-th element of s2.  Tuples are extensible (they might contain
    additional data, such as a boolean to indicate sure vs possible alignments).

        >>> from nltk.translate import Alignment
        >>> a = Alignment([(0, 0), (0, 1), (1, 2), (2, 2)])
        >>> a.invert()
        Alignment([(0, 0), (1, 0), (2, 1), (2, 2)])
        >>> print(a.invert())
        0-0 1-0 2-1 2-2
        >>> a[0]
        [(0, 1), (0, 0)]
        >>> a.invert()[2]
        [(2, 1), (2, 2)]
        >>> b = Alignment([(0, 0), (0, 1)])
        >>> b.issubset(a)
        True
        >>> c = Alignment.fromstring('0-0 0-1')
        >>> b == c
        True
    c                     t                               | |          }|t          g           k    rt          d |D                       nd|_        d |_        |S )Nc              3   &   K   | ]}|d          V  dS r   Nr    r"   ps     r   r$   z$Alignment.__new__.<locals>.<genexpr>   s&      ++!++++++r   r   )	frozenset__new__max_len_index)clspairsr   s      r   r`   zAlignment.__new__   sV      e,,/3y}}/D/DC++d++++++!	r   c                 X    t          d |                                D                       S )a  
        Read a giza-formatted string and return an Alignment object.

            >>> Alignment.fromstring('0-0 2-1 9-2 21-3 10-4 7-5')
            Alignment([(0, 0), (2, 1), (7, 5), (9, 2), (10, 4), (21, 3)])

        :type s: str
        :param s: the positional alignments in giza format
        :rtype: Alignment
        :return: An Alignment object corresponding to the string representation ``s``.
        c                 ,    g | ]}t          |          S r    )
_giza2pair)r"   as     r   
<listcomp>z(Alignment.fromstring.<locals>.<listcomp>   s    ;;;A*Q--;;;r   )r
   split)rd   r2   s     r   
fromstringzAlignment.fromstring   s)     ;;;;;<<<r   c                 l    | j         s|                                  | j                             |          S )zN
        Look up the alignments that map from a given index or slice.
        )rc   _build_index__getitem__)r   keys     r   ro   zAlignment.__getitem__   s6     { 	 {&&s+++r   c                 4    t          d | D                       S )zI
        Return an Alignment object, being the inverted mapping.
        c              3   L   K   | ]}|d          |d         f|dd         z   V   dS )r+   r      Nr    r]   s     r   r$   z#Alignment.invert.<locals>.<genexpr>   s;      >>A1Q41,122.>>>>>>r   )r
   r   s    r   rS   zAlignment.invert   s!     >>>>>>>>r   Nc                 0   t                      }| j        s|                                  |s.t          t	          t          | j                                      }|D ],}|                    d | j        |         D                        -t          |          S )z
        Work out the range of the mapping from the given positions.
        If no positions are specified, compute the range of the entire mapping.
        c              3       K   | ]	\  }}|V  
d S r   r    )r"   _fs      r   r$   z"Alignment.range.<locals>.<genexpr>   s&      66tq!666666r   )setrc   rn   listr0   r   updatesorted)r   	positionsimager^   s       r   r0   zAlignment.range   s    
 { 	  	6U3t{#3#34455I 	7 	7ALL66t{1~6666666e}}r   c                 &    dt          |           z  S )M
        Produce a Giza-formatted string representing the alignment.
        zAlignment(%r))r{   r   s    r   r(   zAlignment.__repr__   s     --r   c                 Z    d                     d t          |           D                       S )r   r,   c              3   0   K   | ]}d |dd         z  V  dS )z%d-%dNrs   r    r]   s     r   r$   z$Alignment.__str__.<locals>.<genexpr>   s.      >>A!BQB%>>>>>>r   )r'   r{   r   s    r   rQ   zAlignment.__str__   s+     xx>>>>>>>>r   c                     d t          | j        dz             D             | _        | D ](}| j        |d                                      |           )dS )z
        Build a list self._index such that self._index[i] is a list
        of the alignments originating from word i.
        c                     g | ]}g S r    r    )r"   rv   s     r   rj   z*Alignment._build_index.<locals>.<listcomp>   s    888ar888r   r+   r   N)r0   rb   rc   append)r   r^   s     r   rn   zAlignment._build_index   sb    
 985Q#7#7888 	( 	(AK!$$Q''''	( 	(r   r   )rT   rU   rV   rW   r`   classmethodrl   ro   rS   r0   r(   rQ   rn   r    r   r   r
   r
      s         2   = = [=, , ,? ? ?   . . .? ? ?( ( ( ( (r   r
   c                 n    |                      d          \  }}t          |          t          |          fS N-rk   int)pair_stringr5   js      r   rh   rh     s/    S!!DAqq663q66>r   c                 p    |                      d          \  }}}t          |          t          |          fS r   r   )r   r5   r   r^   s       r   _naacl2pairr   	  s1    $$GAq!q663q66>r   c                      t          |          t          u sJ t           fd|D                       st          d          t          fd|D                       st          d          dS )ab  
    Check whether the alignments are legal.

    :param num_words: the number of source language words
    :type num_words: int
    :param num_mots: the number of target language words
    :type num_mots: int
    :param alignment: alignment to be checked
    :type alignment: Alignment
    :raise IndexError: if alignment falls outside the sentence
    c              3   D   K   | ]}d |d          cxk    ok     nc V  dS r\   r    )r"   pair	num_wordss     r   r$   z#_check_alignment.<locals>.<genexpr>  sC      >>DqDG''''i''''>>>>>>r   z&Alignment is outside boundary of wordsc              3   X   K   | ]$}|d          du pd|d          cxk    ok     nc V  %dS )r+   Nr   r    )r"   r   num_motss     r   r$   z#_check_alignment.<locals>.<genexpr>  sP      PPdtAw$9!tAw"9"9"9"9"9"9"9"9PPPPPPr   z%Alignment is outside boundary of motsN)r   r
   all
IndexError)r   r   r   s   `` r   r   r     s     	??i''''>>>>I>>>>> CABBBPPPPiPPPPP B@AAAB Br   PhraseTableEntry
trg_phraselog_probc                   *    e Zd ZdZd Zd Zd Zd ZdS )PhraseTablezs
    In-memory store of translations for a given phrase, and the log
    probability of the those translations
    c                 ,    t                      | _        d S r   )dictsrc_phrasesr   s    r   r   zPhraseTable.__init__,  s    66r   c                     | j         |         S )a  
        Get the translations for a source language phrase

        :param src_phrase: Source language phrase of interest
        :type src_phrase: tuple(str)

        :return: A list of target language phrases that are translations
            of ``src_phrase``, ordered in decreasing order of
            likelihood. Each list element is a tuple of the target
            phrase and its log probability.
        :rtype: list(PhraseTableEntry)
        r   r   
src_phrases     r   translations_forzPhraseTable.translations_for/  s     
++r   c                     t          ||          }|| j        vr
g | j        |<   | j        |                             |           | j        |                             d d           dS )z
        :type src_phrase: tuple(str)
        :type trg_phrase: tuple(str)

        :param log_prob: Log probability that given ``src_phrase``,
            ``trg_phrase`` is its translation
        :type log_prob: float
        )r   r   c                     | j         S r   )r   )rI   s    r   <lambda>z!PhraseTable.add.<locals>.<lambda>K  s    
 r   T)rp   reverseN)r   r   r   sort)r   r   r   r   entrys        r   addzPhraseTable.add>  sx     !JJJJT---+-DZ($++E222$)).B.BD)QQQQQr   c                     || j         v S r   r   r   s     r   __contains__zPhraseTable.__contains__M  s    T---r   N)rT   rU   rV   rW   r   r   r   r   r    r   r   r   r   &  s]         
" " ", , ,R R R. . . . .r   r   )r?   collectionsr   r   r_   r
   rh   r   r   r   r   r    r   r   <module>r      s       " " " " " "PN PN PN PN PN PN PN PNf_( _( _( _( _(	 _( _( _(D  
  
B B B* :0<2LMM (. (. (. (. (. (. (. (. (. (.r   