
    +gd                         d Z ddlZddlmZ deeeef         defdZ G d de          Z	 G d	 d
e          Z
 G d d          ZdS )a  
Hashing function for dataset keys using `hashlib.md5`

Requirements for the hash function:

- Provides a uniformly distributed hash from random space
- Adequately fast speed
- Working with multiple input types (in this case, `str`, `int` or `bytes`)
- Should be platform independent (generates same hash on different OS and systems)

The hashing function provides a unique 128-bit integer hash of the key provided.

The split name is being used here as the hash salt to avoid having same hashes
in different splits due to same keys
    N)Union	hash_datareturnc                    t          | t                    r| S t          | t                    r|                     dd          } n4t          | t                    rt          |           } nt          |           |                     d          S )z|
    Returns the input hash_data in its bytes form

    Args:
    hash_data: the hash salt/key to be converted to bytes
    \/zutf-8)
isinstancebytesstrreplaceintInvalidKeyErrorencode)r   s    0lib/python3.11/site-packages/datasets/keyhash.py	_as_bytesr   %   s     )U## )	Is	#	# ) %%dC00			Is	#	# )	NN		 i(((G$$$    c                   "     e Zd ZdZ fdZ xZS )r   z6Raises an error when given key is of invalid datatype.c                     d| _         d| dt          |           | _        d| _        t	                                          | j          | j         | j                    d S )Nz7
FAILURE TO GENERATE DATASET: Invalid key type detectedz
Found Key z	 of type z-
Keys should be either str, int or bytes type)prefixtypeerr_msgsuffixsuper__init__)selfr   	__class__s     r   r   zInvalidKeyError.__init__?   sb    PKiKK$y//KKFDKDDt{DDEEEEEr   __name__
__module____qualname____doc__r   __classcell__r   s   @r   r   r   <   sG        @@F F F F F F F F Fr   r   c                   $     e Zd ZdZd fd	Z xZS )DuplicatedKeysErrorz(Raise an error when duplicate key found. c                    || _         || _        || _        d| _        t	          |          dk    r!dd                    |           d| | _        n;dd                    |d d                    dt	          |          dz
   d| | _        |rd|z   nd	| _        t                      	                    | j         | j         | j                    d S )
Nz3Found multiple examples generated with the same key   z
The examples at index z, z have the key z... (z more) have the key 
r&   )
keyduplicate_key_indicesfix_msgr   lenjoinr   r   r   r   )r   r*   r+   r,   r   s       r   r   zDuplicatedKeysError.__init__I   s   %:"K$%%++kdii@U6V6VkkfikkDLL ]dii@UVYWYVY@Z6[6[  ]  ]bef{b|b|  @B  cB  ]  ]  X[  ]  ]DL(/7dWnnRDKDDt{DDEEEEEr   )r&   r   r#   s   @r   r%   r%   F   sM        22
F 
F 
F 
F 
F 
F 
F 
F 
F 
Fr   r%   c                   @    e Zd ZdZdefdZdeeeef         defdZ	dS )	KeyHasherz,KeyHasher class for providing hash using md5	hash_saltc                 R    t          j        t          |                    | _        d S )N)hashlibmd5r   
_split_md5)r   r1   s     r   r   zKeyHasher.__init__Y   s    !+i	&:&:;;r   r*   r   c                     | j                                         }t          |          }|                    |           t	          |                                d          S )zReturns 128-bits unique hash of input key

        Args:
        key: the input key to be hashed (should be str, int or bytes)

        Returns: 128-bit int hash key   )r5   copyr   updater   	hexdigest)r   r*   r4   byte_keys       r   hashzKeyHasher.hash\   sL     o""$$S>>

83==??B'''r   N)
r   r   r    r!   r   r   r   r   r
   r<    r   r   r0   r0   V   sb        66<# < < < <(c3o. (3 ( ( ( ( ( (r   r0   )r!   r3   typingr   r   r   r
   r   	Exceptionr   r%   r0   r=   r   r   <module>r@      s   "         %sC/ %E % % % %.F F F F Fi F F FF F F F F) F F F ( ( ( ( ( ( ( ( ( (r   