U
    ]ß<`½  ã                   @   sD   d Z ddlZG dd„ dƒZdd„ Zddd„Zdd	d
„Zddd„ZdS )aµ  Code for doing k-nearest-neighbors classification.

k Nearest Neighbors is a supervised learning algorithm that classifies
a new observation based the classes in its surrounding neighborhood.

Glossary:
 - distance   The distance between two points in the feature space.
 - weight     The importance given to each point for classification.

Classes:
 - kNN           Holds information for a nearest neighbors classifier.


Functions:
 - train        Train a new kNN classifier.
 - calculate    Calculate the probabilities of each class, given an observation.
 - classify     Classify an observation into a class.

Weighting Functions:
 - equal_weight    Every example is given a weight of 1.

é    Nc                   @   s   e Zd ZdZdd„ ZdS )ÚkNNa  Holds information necessary to do nearest neighbors classification.

    Attribues:
     - classes  Set of the possible classes.
     - xs       List of the neighbors.
     - ys       List of the classes that the neighbors belong to.
     - k        Number of neighbors to look at.
    c                 C   s   t ƒ | _g | _g | _d| _dS )zInitialize.N)ÚsetÚclassesÚxsÚysÚk)Úself© r	   ú&lib/python3.8/site-packages/Bio/kNN.pyÚ__init__,   s    zkNN.__init__N)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r	   r	   r	   r
   r   "   s   	r   c                 C   s   dS )z8Return integer one (dummy method for equally weighting).é   r	   )ÚxÚyr	   r	   r
   Úequal_weight4   s    r   c                 C   s.   t ƒ }t|ƒ|_t | |¡|_||_||_|S )a(  Train a k nearest neighbors classifier on a training set.

    xs is a list of observations and ys is a list of the class assignments.
    Thus, xs and ys should contain the same number of elements.  k is
    the number of neighbors that should be examined when doing the
    classification.
    )r   r   r   ÚnumpyÚasarrayr   r   r   )r   r   r   ÚtypecodeÚknnr	   r	   r
   Útrain:   s    
r   c                 C   s  |dkrt }t |¡}g }|rRtt| jƒƒD ]"}||| j| ƒ}| ||f¡ q,nXt t|ƒ¡}tt| jƒƒD ]:}|| j|  |dd…< t t 	||¡¡}| ||f¡ qn| 
¡  i }| jD ]}	d||	< q¼|d| j… D ].\}}| j| }
||
 ||| j| ƒ ||
< qØ|S )aÎ  Calculate the probability for each class.

    Arguments:
     - x is the observed data.
     - weight_fn is an optional function that takes x and a training
       example, and returns a weight.
     - distance_fn is an optional function that takes two points and
       returns the distance between them.  If distance_fn is None (the
       default), the Euclidean distance is used.

    Returns a dictionary of the class to the weight given to the class.
    Ng        )r   r   r   ÚrangeÚlenr   ÚappendZzerosZsqrtÚdotÚsortr   r   r   )r   r   Ú	weight_fnÚdistance_fnÚorderÚiZdistZtempÚweightsr   Úklassr	   r	   r
   Ú	calculateJ   s*    



r$   c           	      C   sR   |dkrt }t| |||d}d}d}| ¡ D ] \}}|dksD||kr,|}|}q,|S )a%  Classify an observation into a class.

    If not specified, weight_fn will give all neighbors equal weight.
    distance_fn is an optional function that takes two points and returns
    the distance between them.  If distance_fn is None (the default),
    the Euclidean distance is used.
    N)r   r   )r   r$   Úitems)	r   r   r   r   r"   Z
most_classZmost_weightr#   Zweightr	   r	   r
   Úclassifyw   s    r&   )N)NN)NN)r   r   r   r   r   r$   r&   r	   r	   r	   r
   Ú<module>   s   

-