
    H&h0                         d dl Z d dlZd dlZd dlmZ d dlZd dlZd dlZd dl	Z	d dl
mZ d dl d	dZd	dZd
dZedk(  rd dlZ ej$                          yy)    N)etree)Refactor)*c                 h   t        |dt        j                  j                         j                         fd}|;g }t	        t        |             D ]   } || |   d|      }|j                  |       " |S g }t	        t        |            D ]   } |d||   |      }|j                  |       " |S )aI	  
    Search Catalogue of Life for for direct children of a particular taxon.

    :param name: The string to search for. Only exact matches found the name given
        will be returned, unless one or wildcards are included in the search
        string. An * (asterisk) character denotes a wildcard; a % (percentage)
        character may also be used. The name must be at least 3 characters long,
        not counting wildcard characters.
    :param id: The record ID of the specific record to return (only for scientific
        names of species or infraspecific taxa)
    :param format: format of the results returned. Valid values are format=xml and
        format=php; if the format parameter is omitted, the results are returned
        in the default XML format. If format=php then results are returned as a
        PHP array in serialized string format, which can be converted back to an
        array in PHP using the unserialize command
    :param start: The first record to return. If omitted, the results are returned
        from the first record (start=0). This is useful if the total number of
        results is larger than the maximum number of results returned by a single
        Web service query (currently the maximum number of results returned by a
        single query is 500 for terse queries and 50 for full queries).
    :param checklist: The year of the checklist to query, if you want a specific
        year's checklist instead of the lastest as default (numeric). Valid years
        are 2010 through the previous year from the current date. If none given,
        the "lastest" checklist is used
    
    You must provide one of name or id. The other parameters (format and start) are
    optional. Returns A list of data.frame's.

    Usage::

        import pytaxize
        pytaxize.col_children(name=["Apis"])

        # An example where there is no classification, results in data.frame with no rows
        pytaxize.col_children(id=["4fdb38d6220462049eab9e3f285144e0"])

        # Use a specific year's checklist
        pytaxize.col_children(name=["Apis"], checklist="2012")
        pytaxize.col_children(name=["Apis"], checklist="2009")

        # Pass in many names or many id's
        out = pytaxize.col_children(name=["Buteo","Apis","Accipiter"], checklist="2012")
        # get just one element in list of output
        out[0]
    i  c                    d}|nCt        |      }|dv rt        j                  dd|z   |      }nd}t        j                  d||      }| |dd}|j                         D ci c]  \  }}|	|| }}}t	        ||d	
      j                         }|j                  d      }t        |      dk(  rt        j                  d       g }	t        t        |            D ]V  }
||
   j                         }|	j                  t        t        g d|d d D  cg c]  } | j                   c}                    X |	S c c}}w c c} w )N.https://www.catalogueoflife.org/col/webservice201220112010colannual-checklist/@https://www.catalogueoflife.org/annual-checklist/year/webserviceyearfull)nameidformatresponsestartgetrequest//child_taxa//taxonr   z Please enter a valid search namer   r   rank   )strresubitemsr   xmlxpathlensysexitrangegetchildrenappenddictziptext)xy	checklisturlpayloadkvtt	childtaxaoutlistitt_r   r   s               [/mounts/lovelace/software/anaconda3/envs/py312/lib/python3.12/site-packages/pytaxize/col.pyfunczcol_children.<locals>.func=   s:   >II22ffU$7)$CSIXffVY4!fQVW$+MMOEDAqq}1a4EEc7E2668HH23	y>QHH78s9~& 	AA,**,CNNS-BQ/H1/HIJ	
  F 0Is   %
D90D9D?N)assert_range_numericdatetimenowr   r'   r$   r)   )	r   r   r   r   r/   r:   tempr7   sss	     ``     r9   col_childrenr@      s    ^ D(*;*;*?*?*A*F*FG6 
zs4y! 	Ad1gtY/BKKO	 s2w 	AdBqE9-BKKO	     c                    
 d
d
fd}t        | t              rt               }|j                  |        |} g }t	        t        |             D ]"  } || |   ||||      }	|j                  |	       $ |S )a  
    :param name: The string to search for. Only exact matches found the name given
        will be returned, unless one or wildcards are included in the search
        string. An * (asterisk) character denotes a wildcard; a % (percentage)
        character may also be used. The name must be at least 3 characters long,
        not counting wildcard characters.
    :param downto: The taxonomic level you want to go down to. See examples below.
        The taxonomic level IS case sensitive, and you do have to spell it
        correctly. See rank_ref for spelling.
    :param checklist: The year of the checklist to query, if you want a specific
        year's checklist instead of the lastest as default (numeric).
    :param format: The returned format (default = None). If NULL xml is used.
        Currently only xml is supported.
    :param start:  The first record to return (default = None). If NULL, the
       results are returned from the first record (start=0). This is useful if
       the total number of results is larger than the maximum number of results
       returned by a single Web service query (currently the maximum number of
       results returned by a single query is 500 for terse queries and 50 for
       full queries).

    Returns a list of Pandas DataFrame's.

    Usage::
        
        import pytaxize

        pytaxize.col_downstream(name="Apis", downto="Species")
        pytaxize.col_downstream(name="Insecta", downto="Order")

        # An example that takes a bit longer
        pytaxize.col_downstream(name=["Insecta","Animalia"], downto="Class")

        # Using a checklist from a specific year
        pytaxize.col_downstream(name="Apis", downto="Species", checklist=2011)
    r   r   c                 .   |}nAt        |      }|dv rt        j                  dd|z         }nt        j                  d|      }t        j                  dd      }t        j                  |      }|j                  D cg c]  }| }	}g }
t        t        |	            D ]  }||	|   v }|
j                  |        |j                  t        j                  |
dg            }||j                  |j                  d	k(     j                  d
   |j                   d
    }|j                  D cg c]  }|j#                  d      d
    }}| }d}t        j                  dg      }g }d
}|dk(  r|dz  }fd} |||      }||d   D cg c]  }| c}v r$|j                  |j                  |d   |k(            |j                  |d   |k7     j                   d
   d
kD  rI|d   D cg c]  }| }}g }t        t        |            D ]  }|j                  ||   |v         ||   }n3t%               }|j                  |       t        j                  |dg      }t'        |d   |k(        rd}n|d   }d}|dk(  r|S c c}w c c}w c c}w c c}w )Nr	   r   r   r   pytaxizezdata/rank_ref.csvmatchcolumnsTr   ,notrankName   c                    | dd}|j                         D ci c]  \  }}|	|| }}}t        ||d      j                         }|j                  d      }g }t	        t        |            D ]A  }||   j                         }	|j                  |	d d D  cg c]  } | j                   c}        C t        j                  |g d      }
|
S c c}}w c c} w )	Nr   )r   r   r   r   r   r   r   r   r   rF   )r!   r   r"   r#   r'   r$   r(   r)   r,   pd	DataFrame)r-   r0   r1   r2   r3   r4   r5   r6   r7   r8   dfr   r   s              r9   	searchcolz/col_downstream.<locals>.func.<locals>.searchcol   s    #$ &8,3MMOMDAqq}1a4MMc7E:>>@HH%:;	s9~. >A#A,224CNNCG#<qAFF#<=> \\'3GH	 N $=s   
CCC
r   famr   )r   r   r    pkg_resourcesresource_filenamerM   read_csvranksr'   r$   r)   joinrN   locrE   indexshapesplitlistall)r   downtor   r   r/   r0   rank_ref_pathdatr-   stuffthingsr7   r?   dat2subsettoranktogetstop_notoutoutiterrP   r4   shbbvalscol_urlyear_urls     ``                      r9   r:   zcol_downstream.<locals>.func   s   CII22ffU$7)$CWMffVY9%77
DWXkk-(II&q&&s5z" 	A58#BMM"	 xxVgY?@dhhtzzT1288;TZZ]K+1<<8a!''#,q/88zl3unAID 5#&BRZ0!00

266"V*"678vvbjF*+11!4q8!#F,Aa,,s2w -A))BqEVO,-BvF#dVH=6&>V+,vI unL 
i ' 92 1 -s   	J/J	J*	J)
isinstancer   r[   r)   r'   r$   )r   r]   r   r   r/   r:   nametmpr>   r7   r4   rm   rn   s             @@r9   col_downstreamrq   e   s    H ?GQHAF $&tD3t9 $q'665)<B KrA   c                    fd}|9g }t        t        |             D ]  }|j                   || |   d               |S t        |      }g }t        t        |            D ]  }|j                   |d||                  |S )aw  
    Search Catalogue of Life for taxonomic IDs

    :param name: The string to search for. Only exact matches found the name given
       will be returned, unless one or wildcards are included in the search
       string. An * (asterisk) character denotes a wildcard; a % (percentage)
       character may also be used. The name must be at least 3 characters long,
       not counting wildcard characters.
    :param id: The record ID of the specific record to return (only for scientific
         names of species or infraspecific taxa)
    :param start: The first record to return. If omitted, the results are returned
         from the first record (start=0). This is useful if the total number of
         results is larger than the maximum number of results returned by a single
         Web service query (currently the maximum number of results returned by a
         single query is 500 for terse queries and 50 for full queries).
    :param checklist: The year of the checklist to query, if you want a specific
         year's checklist instead of the lastest as default (numeric).

    You must provide one of name or id. The other parameters (format and start)
    are optional.

    Usage::

        import pytaxize

        pytaxize.col_search(name=["Apis"])
        pytaxize.col_search(id=15669061)

        # Many names
        pytaxize.col_search(name=["Apis","Puma concolor"])

        # Many ids - DOESNT WORK
        pytaxize.col_search(id=[15669061,6862841])

        # An example where there is no data
        pytaxize.col_search(id=11935941)

        # Example with more than 1 result
        pytaxize.col_search(name=['Poa'])
    c                    d}n8dv rt        j                  ddz   |      }nd}t        j                  d|      }| |d}t        ||d	      j                         }|j	                  d
      }g }t        t        |            D ]r  }||   }i }	t        t        |            D ]A  }
||
   j                         D ])  }|	j                  |j                  |j                  i       + C |j                  |	       t |S )Nr   r	   r   r   r   r   )r   r   r   r   r   z//result)r   r    r   r"   r#   r'   r$   ri   updatetagr,   r)   )r-   r.   r0   r1   r4   r`   r6   r7   r8   eachger/   r   s               r9   r:   zcol_search.<locals>.func  s   >22ffU$7)$CSIXffVY4A6c7E2668$s5z" 	!A(CD3s8_ 1Q 1AKK011 NN4 	! rA   N)r.   )r-   r.   )r'   r$   r)   r   )r   r   r   r/   r:   r>   r7   s     ``   r9   
col_searchry      s    T0 
zs4y! 	/AKKT!W-.	/ K	 Ws2w 	/AKKtr!u-.	/KrA   __main__)NNNNN)NNNN)r%   requestsr<   lxmlr   pandasrM   r   jsonrR   pytaxize.refactorr   pytaxize.utilsr@   rq   ry   __name__doctesttestmod rA   r9   <module>r      sV    
     	   & WrqfKx zGOO rA   