
    +gd                         d dl Zd ZddZdS )    Nc                 :   || z  |                                  z  }t          j        |          }t          ||                                 z
            }|dk    r||z
  }t          j        t          j        |                    ddd         }|D ]n}t          j        ||k              \  }	t          t          |	          |          }
|	                    |	|
d          }	||	xx         dz  cc<   ||
z  }|dk    r no|
                    t          j                  S )a  Computes approximate mode of multivariate hypergeometric.
    This is an approximation to the mode of the multivariate
    hypergeometric given by class_counts and n_draws.
    It shouldn't be off by more than one.
    It is the mostly likely outcome of drawing n_draws many
    samples from the population given by class_counts.
    Args
    ----------
    class_counts : ndarray of int
        Population per class.
    n_draws : int
        Number of draws (samples to draw) from the overall population.
    rng : random state
        Used to break ties.
    Returns
    -------
    sampled_classes : ndarray of int
        Number of samples drawn from each class.
        np.sum(sampled_classes) == n_draws

    r   NF)sizereplace   )sumnpfloorintsortuniquewhereminlenchoiceastypeint64)class_countsn_drawsrng
continuousflooredneed_to_add	remaindervaluesvalueindsadd_nows              7lib/python3.11/site-packages/datasets/utils/stratify.pyapproximate_moder       s   0 <',*:*:*<*<<Jhz""G g-..KQ(	9--..ttt4  	 	EhyE122GT
 #d))[11G::d%:@@DDMMMQMMM7"Ka  >>"(###    
   c           	   #     K   t          j        | d          \  }}|j        d         }t          j        |          }t          j        |          dk     rt          d          ||k     rt          d||fz            ||k     rt          d||fz            t          j        t          j        |d	          t          j        |          d
d                   }	t          |          D ]}
t          |||          }||z
  }t          |||          }g }g }t          |          D ]}|                    ||                   }|	|                             |d          }|                    |d
||                             |                    |||         ||         ||         z                       |                    |          }|                    |          }||fV  d
S )a  

    Provides train/test indices to split data in train/test sets.
    It's reference is taken from StratifiedShuffleSplit implementation
    of scikit-learn library.

    Args
    ----------

    n_train : int,
        represents the absolute number of train samples.

    n_test : int,
        represents the absolute number of test samples.

    random_state : int or RandomState instance, default=None
        Controls the randomness of the training and testing indices produced.
        Pass an int for reproducible output across multiple function calls.

    n_splits : int, default=10
        Number of re-shuffling & splitting iterations.
    T)return_inverser      zMinimum class count errorzLThe train_size = %d should be greater or equal to the number of classes = %dzKThe test_size = %d should be greater or equal to the number of classes = %d	mergesort)kindNr   clip)mode)r	   r   shapebincountr   
ValueErrorsplitargsortcumsumranger    permutationtakeextend)yn_trainn_testr   n_splitsclasses	y_indices	n_classesr   class_indices_n_iclass_counts_remainingt_itraintestir1   perm_indices_class_is                      r   )stratified_shuffle_split_generate_indicesrD   6   s     . 1T:::GYa I;y))L	vla4555]ahjs`tt
 
 	
 	\`fhq_rr
 
 	
 HRZ	DDDbiP\F]F]^a_a^aFbccM8__  |Wc::!-!35vsCCy!! 	H 	HA//,q/::K#0#3#8#86#8#R#R LL-hAh7888KK,SVc!fs1vo-EFGGGG&&t$$Tk! r!   )r"   )numpyr	   r    rD    r!   r   <module>rG      s?       /$ /$ /$d5 5 5 5 5 5r!   