
    H&h@                     ,   d dl Z d dlZd dlZd dlmZ d dlZd dlZd dlmZ d dl	m
Z
 d dlmZ d dlZ	 d dlZ G d de      Zdd	Zdd
ZddZ	 	 	 	 	 	 	 	 	 	 ddZedk(  rd dlZ ej4                          yy# e$ r  ej"                  d       dZY \w xY w)    N)etree)resource_filename)Refactor)_dfz1Pandas library not installed, dataframes disabledc                       e Zd Zy)NoResultExceptionN)__name__
__module____qualname__     [/mounts/lovelace/software/anaconda3/envs/py312/lib/python3.12/site-packages/pytaxize/tax.pyr   r      s    r   r   c                     | dk(  rt        |d|      S | dk(  rt        |d|      S | dk(  rt        |d|      S | dk(  rt        |d|      S y	)
aU  
    Get a random vector of species names.

    :param rank: Taxonomic rank, one of species, genus (default), family, order.
    :param size: Number of names to get. Maximum depends on the rank.
    :param as_dataframe: (optional) Type: boolean. Return as pandas data frame?
      default: False

    Usage::

        import pytaxize
        pytaxize.names_list(size=10)
        pytaxize.names_list('species', size=10)
        pytaxize.names_list('family', size=10)
        pytaxize.names_list('order', size=10)
        pytaxize.names_list('order', 2)
        pytaxize.names_list('order', 15)
    specieszdata/plantNames.csvgenuszdata/plantGenusNames.csvfamilyzdata/apg_families.csvorderzdata/apg_orders.csvz4rank must be one of species, genus, family, or order)names_list_helper)ranksizeas_dataframes      r   
names_listr      se    & y '<lKKw 'A<PPx '>MMw '<lKKEr   c                 l   t        t        |      }|r+t        j                  |      }|d   d |  j	                         S t        |d      5 }t        j                  |      }t        |       g }|D ]  }|j                  |        	 d d d        D cg c]  }|d   	 c}d |  S # 1 sw Y   xY wc c}w )Nnames )newliner   )
r   r	   pdread_csvtolistopencsvreadernextappend)	r   pathr   pnpathdatfr"   rowws	            r   r   r   6   s    x.Fkk&!7|ET"))++&"% 	 ZZ]FLC  

3 		  ""!"5D))	  	  #s   ;B%B1%B.c                 h   |dk(  rd}nd}t        |       dkD  rVdj                  |       }d|i}|dk(  rt        ||d      j                         }|S t        ||d      j	                         }|S d| i}|dk(  rt        ||d	      j                         }|S t        ||d	      j	                         }|S )
a  
    Search the CANADENSYS Vascan API.

    :param q: Taxonomic rank, one of species, genus (default), family, order.
    :param format: Number of names to get. Maximum depends on the rank.
    :param raw: Raw data or not (default)
    :param callopts: Further args passed to request

    Usage::

        import pytaxize
        pytaxize.vascan_search(q = ["Helianthus annuus"])
        pytaxize.vascan_search(q = ["Helianthus annuus"], raw=True)
        pytaxize.vascan_search(q = ["Helianthus annuus", "Crataegus dodgei"], raw=True)

        # format type
        ## json
        pytaxize.vascan_search(q = ["Helianthus annuus"], format="json", raw=True)

        ## xml
        pytaxize.vascan_search(q = ["Helianthus annuus"], format="xml", raw=True)

        # lots of names, in this case 50
        splist = pytaxize.names_list(rank='species', size=50)
        pytaxize.vascan_search(q = splist)
    jsonz5http://data.canadensys.net/vascan/api/0.1/search.jsonz4http://data.canadensys.net/vascan/api/0.1/search.xml   
qpost)requestget)lenjoinr   r,   raw)r/   formatr5   urlquerypayloadouts          r   vascan_searchr;   E   s    6 ED
1vz		!,V38==?C 
 38<<>C
(V37<<>C 
 37;;=C
r   c
           	         | ||d}
|
j                         D ci c]  \  }}|dk7  s|| }
}}t        |
      dkD  rt        j                  d       d}| |||||||d}|j                         D ci c]  \  }}|dk7  s|| }}}t	        j
                  ||      }|j                          |j                         }|d   }|}|j                  d       |	rt        |d	      }||d
S c c}}w c c}}w )a  
  Resolve names using Global Names Recognition and Discovery.

  Uses the Global Names Recognition and Discovery service, see
  http://gnrd.globalnames.org/.

  :param url: An encoded URL for a web page, PDF, Microsoft Office document, or
    image file, see examples
  :param file: When using multipart/form-data as the content-type, a file may be sent.
    This should be a path to your file on your machine.
  :param text: Type: string. Text content; best used with a POST request, see
    examples
  :param engine: (optional) Type: integer, Default: 0. Either 1 for TaxonFinder,
    2 for NetiNeti, or 0 for both. If absent, both engines are used.
  :param unique: (optional) Type: boolean. If True (default),
    response has unique names without offsets.
  :param verbatim: (optional) Type: boolean, If True (default to False),
    response excludes verbatim strings.
  :param detect_language: (optional) Type: boolean, When
    True (default), NetiNeti is not used if the language of incoming text is
    determined not to be English. When 'false', NetiNeti will be used if requested.
  :param all_data_sources: (optional) Type: bolean. Resolve found
    names against all available Data Sources.
  :param data_source_ids: (optional) Type: string. Pipe separated list of data
    source ids to resolve found names against. See list of Data Sources.
  :param as_dataframe: (optional) Type: boolean. Return as pandas data frame?
    default: False

  Usage::
  
      import pytaxize

      # Get data from a website using its URL
      out = pytaxize.scrapenames(url = 'https://en.wikipedia.org/wiki/Spider')
      out['data'].head() # data
      out['meta'] # metadata

      # Scrape names from a pdf at a URL
      out = pytaxize.scrapenames(url = 'http://www.mapress.com/zootaxa/2012/f/z03372p265f.pdf')
      out['data'].head() # data
      out['meta'] # metadata

      # With arguments
      pytaxize.scrapenames(url = 'http://www.mapress.com/zootaxa/2012/f/z03372p265f.pdf', unique=True)
      pytaxize.scrapenames(url = 'http://www.mapress.com/zootaxa/2012/f/z03372p265f.pdf', all_data_sources=True)

      # Get data from text string as an R object
      pytaxize.scrapenames(text='A spider named Pardosa moesta Banks, 1892')
  )r7   filetextNr-   z*Only one of url, file, or text can be usedz,http://gnrd.globalnames.org/name_finder.json)r7   r>   engineuniqueverbatimdetect_languageall_data_sourcesdata_source_ids)paramsr   T)metadata)
itemsr3   sysexitrequestsr2   raise_for_statusr,   popr   )r7   r=   r>   r?   r@   rA   rB   rC   rD   r   methodkeyvaluebaser9   r:   resrG   rF   s                      r   scrapenamesrS   v   s   z $5F+1<<>KZS%Ud]c5jKFK
6{Q=>9D*,*	G -4MMOMjc5u}sEzMGM
,,tG
,C
((*Cw<DDHHW4$''1 L Ns   C+C+3C1C1__main__)r   
   F)F)r,   F)
NNNNNNNNNF)rI   warningsrK   lxmlr   rer,   pkg_resourcesr   pytaxize.refactorr   pytaxize.itis.itisr   r!   pandasr   ImportErrorwarn	Exceptionr   r   r   r;   rS   r	   doctesttestmodr   r   r   <module>rb      s    
    	  + & " 
		 	F>*.d 			V(J zGOO [  HMMEF	Bs   A7 7BB