
    +dK                        d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZmZ  ej        d          Zd Z e            Z G d d	e          ZddZ G d de          Z G d de          Z G d de          Z G d de          Z G d de j        j                  ZeeeedZddi fdZ dS )    N)datetime)md5)conf)is_notebookmake_path_posixintakec                 ^   ddl m}  ||           }|d         }|dv rGt          j                            |                     d                    |          d                    } n;|dk    r5t          j                            |           } |                     dd          } t          |           S )	zUtility for cleaning up paths.r   )infer_storage_optionsprotocol)httphttpsz{}:// file:)fsspec.utilsr
   ospathnormpathreplaceformatr   )r   r
   storage_optionr   s       3lib/python3.11/site-packages/intake/source/cache.pysanitize_pathr      s    222222**400Nj)H$$ % wW^^H-E-Er J JKK	V	 %w%%||C$$4       c                   r    e Zd ZdZdZddi fdZd Zd Zd ZddZ	d	 Z
dd
Zd ZddZd Zd Zd Zd ZdS )	BaseCachea  
    Provides utilities for managing cached data files.

    Providers of caching functionality should derive from this, and appear
    as entries in ``registry``. The principle methods to override are
    ``_make_files()`` and ``_load()`` and ``_from_metadata()``.
    i@KL Nc                     || _         || _        t          |pt          d                   }|dk    r&|t	          d          t          j        |d          }|| _        || _        t                      | _
        dS )a  
        Parameters
        ----------
        driver: str
            Name of the plugin that can load catalog entry
        spec: dict
            Specification for caching the data source.
        cache_dir: str or None
            Explicit location of cache root directory
        catdir: str or None
            Directory containing the catalog from which this spec was made
        	cache_dircatdirNz?cache_dir="catdir" only allowed when loadedfrom a catalog file.intake_cache)_driver_specr   r   	TypeError	posixpathjoin
_cache_dir_storage_optionsCacheMetadata	_metadata)selfdriverspecr   r   storage_optionscds          r   __init__zBaseCache.__init__<   s|     
Y;${*;<<> 	8 f deee77B /&r   c                    t           j                            | j                  st          j        | j                   t           j                            | j                  r't          d                    | j                            d S )Nz-Path for cache directory exists as a file: {})r   r   existsr&   makedirsisfile	Exceptionr   r*   s    r   _ensure_cache_dirzBaseCache._ensure_cache_dirU   sr    w~~do.. 	)K(((7>>$/** 	hNUUVZVeffggg	h 	hr   c                     dd l }t          |          }d| j        v r/dt          | j        d                   z  } |j        |d|          }t	          j        | j        ||                    d                    S )Nr   regexz%sr   z/\)rer   r"   subr$   r%   r&   lstrip)r*   cache_subdirurlpathr9   r   r8   s         r   _munge_pathzBaseCache._munge_path[   sw    			W%%dj  	+M$*W*=>>>E26%T**D~do|T[[=O=OPPPr   c           	          t          t          t          j                            |          | j                            dd          | j        f                                                    	                                S )Nr8   r   )
r   strr   r   basenamer"   getr!   encode	hexdigestr*   r=   s     r   _hashzBaseCache._hashf   s[    3((114:>>'23N3NPTP\]^^eegghhrrtttr   c                 X   ||                      |          }|                     ||          }t          j                            |          }t          j                            |          s>|                    d          s)|                    d          st          j        |           |S )Nzhttps://zhttp://)rF   r>   r   r   dirnamer1   
startswithr2   )r*   r=   subdir
cache_pathrH   s        r   _pathzBaseCache._pathi   s     	)ZZ((F%%fg66
'//*--w~~g&& 	%&&z22 %g6H6H6S6S %G$$$r   c                     t          j                                                    ||d}| j                            ||           d S )N)createdoriginal_pathrK   )r   now	isoformatr)   update)r*   r=   rO   rK   metadatas        r   _log_metadatazBaseCache._log_metadatau   sB    '|~~7799Misttgx00000r   c                 Z   t          j        dd          r|gS ||nt          j        dd          | _        |                     |          }|t	          d |D                       r/|                     |          \  }}|                     |||           |                     |          }|S )aW  
        Downloads data from a given url, generates a hashed filename,
        logs metadata, and caches it locally.

        Parameters
        ----------

        urlpath: str, location of data
            May be a local path, or remote path if including a protocol specifier
            such as ``'s3://'``. May include glob wildcards.
        output: bool
            Whether to show progress bars; turn off for testing

        Returns
        -------
        List of local cache_paths to be opened instead of the remote file(s). If
        caching is disable, the urlpath is returned.
        cache_disabledFNcache_download_progressTc              3   V   K   | ]$}t           j                            |           V  %d S N)r   r   r1   ).0cs     r   	<genexpr>z!BaseCache.load.<locals>.<genexpr>   s3      %Q%Q"'..*;*;&;%Q%Q%Q%Q%Q%Qr   )r   rB   output_from_metadataany_make_files_load)r*   r=   r]   kwargscache_pathsfiles_in	files_outs          r   loadzBaseCache.loady   s    & 8$e,, 	9 &affAZ\`8a8a))'22 	5#%Q%Q[%Q%Q%Q"Q"Q 	5"&"2"27";";HiJJxG444))'22r   c                 L    |                      |          }|d |D             S dS )/Return set of local URLs if files already existNc                     g | ]
}|d          S )rK    )rZ   es     r   
<listcomp>z,BaseCache._from_metadata.<locals>.<listcomp>   s    000AlO000r   )get_metadatar*   r=   mds      r   r^   zBaseCache._from_metadata   s;    w'' 	100R0000	1 	1r   Tc           	         ddl }g }g }t          ||          D ]7\  }}	|	j        }
|                    |
           |
|k    r)t          j                            |
          st                              d                    |j                             t                              d                    |                     t                              d                    |
                     |r| 	                    ||j        |
           |
                    t                    }|                     |||	| j        | j                             9 |j        |  |S )zDownload a set of filesr   NCaching file: {}Original path: {}Cached at: {})daskzipr   appendr   r3   loggerdebugr   rT   delayed	_download	blocksizer]   compute)r*   rd   re   r=   metart   outoutnamesfile_infile_outrK   ddowns               r   ra   zBaseCache._load   sJ   !$Xy!9!9 	R 	RGX!JOOJ''' W$ 7>>*-- R/66w|DDEEE077@@AAA_33J??@@@ J&&wjIIIY//

55(DNDKPPQQQcr   c                     t           )z$Make OpenFiles for all input/outputs)NotImplementedErrorr*   r=   rb   s      r   r`   zBaseCache._make_files   s    !!r   c                 6    | j                             |          S )a6  
        Parameters
        ----------

        urlpath: str, location of data
            May be a local path, or remote path if including a protocol specifier
            such as ``'s3://'``. May include glob wildcards.

        Returns
        -------
        Metadata (dict) about a given urlpath.
        )r)   rB   rE   s     r   rm   zBaseCache.get_metadata   s     ~!!'***r   c                    | j                             |g           }|D ]4}	 t          j        |d                    # t          t
          f$ r Y 1w xY w	 t          j                            |d                   }t          j        |           dS # t          t
          f$ r! t          
                    d|z             Y dS w xY w)a  
        Clears cache and metadata for a given urlpath.

        Parameters
        ----------

        urlpath: str, location of data
            May be a local path, or remote path if including a protocol specifier
            such as ``'s3://'``. May include glob wildcards.
        rK   z$Failed to remove cache directory: %sN)r)   popr   removeOSErrorIOErrorr   rH   rmdirrw   rx   )r*   r=   cache_entriescache_entryfns        r   clear_cachezBaseCache.clear_cache   s     **7B77( 	 	K	+l34444W%   	F\!:;;BHRLLLLL! 	F 	F 	FLL?"DEEEEEE	Fs!   <AA9B .C Cc                 x   | j                                         D ]}|                     |           t          j                            | j                  sdS t          j        | j                  D ]}	 t          j	        | j        |          }t          j                            |          rt          j        |           t          j                            |          rt          j        |           # t          t          f$ r1}t                               t%          |                     Y d}~d}~ww xY wdS )z0
        Clears all cache and metadata.
        N)r)   keysr   r   r   isdirr&   listdirr$   r%   shutilrmtreer3   r   r   r   rw   warningr@   )r*   r=   rJ   r   rk   s        r   	clear_allzBaseCache.clear_all   s)    ~**,, 	& 	&GW%%%% w}}T_-- 	Fj11 	' 	'F'^DOV<<7==$$ &M"%%%7>>"%% "IbMMMW% ' ' 's1vv&&&&&&&&'	' 	's   4B C55D7'D22D7rY   T)__name__
__module____qualname____doc__r{   r/   r6   r>   rF   rL   rT   rf   r^   ra   r`   rm   r   r   rj   r   r   r   r   0   s         I,0DRT ) ) ) )2h h h	Q 	Q 	Qu u u
 
 
 
1 1 1   <1 1 1   4" " "+ + +F F F.' ' ' ' 'r   r   Fc           
      
   t          j                    5  t          j        d           |r4	 ddlm} n,# t
          $ r t                              d           d}Y nw xY wt                      r2	 ddl	}n,# t
          $ r t                              d           d}Y nw xY w	 | j
                            | j                  }| }nH# t          $ r;}t                              d                    |                     d}d	}Y d}~nd}~ww xY w|ru|sqt!          d
          D ])}	|	t"          vrt"                              |	           |	}
 n* ||dz  d||
t&          j                            |j                  dd          }nd}t                              d                    | j                             | 5 }|5 }d	}|rW|                    |r|nd          }|                    |           |r%|                    t1          |          dz             |Wddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   |r	 |                    |j        |j        z
             |                                 n4# t8          $ r'}t                              d|z             Y d}~nd}~ww xY wt"                              |
           n# t"                              |
           w xY wddd           dS # 1 swxY w Y   dS )z2Read from input and write to output file in blocksignorer   )tqdmzHCache progress bar requires tqdm to be installed: conda/pip install tqdmFNzbCache progress bar in a notebook requires ipywidgets to be installed: conda/pip install ipywidgetsz%File system error requesting size: {}Td   i   g?z{n}/|/{l_bar})totalleavedisablepositiondescmininterval
bar_formatz
Caching {}ztqdm exception: %s)warningscatch_warningsfilterwarningstqdm.autonotebookr   ImportErrorrw   warnr   
ipywidgetsfssizer   
ValueErrorrx   r   rangedisplayaddr   rA   readwriterR   lenr   ncloser4   r   )r   r   r{   r]   r   r   	file_sizepbar_disablederrir~   pbarf1f2datark   s                   r   rz   rz      s@   		 	"	" :$ :$))) 	2222222   ijjj == 	!!!!!     D  E  E  E	!
55I )MMM 	! 	! 	!LL@GGLLMMMI MMMMMM	!  	  s  A' A t#u,) ))(-88 #/   \((66777 	8 8R 877	#A99rBBDHHTNNN 8CII$6777	  88 8 8 8 8 8 8 8 8 8 8 8 8 8 8	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8  	$$DJ/000

 7 7 71A5666666667 s####s####u:$ :$ :$ :$ :$ :$ :$ :$ :$ :$ :$ :$ :$ :$ :$ :$ :$ :$s  K83K8&AK8AK8.A32K83&BK8BK8 "CK8
D1D>K8DB.K86H89AH!H8!H%%H8(H%)H8,K88H<	<K8?H<	 K86I=<K=
J.J)$K)J..K1K8K((K88K<?K<c                   (     e Zd ZdZd Z fdZ xZS )	FileCachezCache specific set of files

    Input is a single file URL, URL with glob characters or list of URLs. Output
    is a specific set of local files.
    c                 6    ddl m                                   t          |t          t
          f          r)                     d                    |                    n                     |           |dfi  j        } fd|D             }||fS )Nr   
open_filesr   rbc                 n    g | ]1}                      |j                  gd fi j        d         2S wbr   rL   r   r'   rZ   fr   r*   rJ   s     r   rl   z)FileCache._make_files.<locals>.<listcomp>E  J    rrrdeZZAFF!;!; <d\\dF[\\]^_rrrr   )	fsspecr   r6   
isinstancelisttuplerF   r%   r'   )r*   r=   rb   rd   re   r   rJ   s   `    @@r   r`   zFileCache._make_files<  s    %%%%%%   ge}-- 	)ZZ 1 122FFZZ((F:gtEEt/DEErrrrrriqrrr	""r   c                 F    t                                          |          S rY   )superr^   )r*   r=   	__class__s     r   r^   zFileCache._from_metadataH  s    ww%%g...r   )r   r   r   r   r`   r^   __classcell__r   s   @r   r   r   5  sQ         
# 
# 
#/ / / / / / / / /r   r   c                       e Zd ZdZd Zd ZdS )DirCachezCache a complete directory tree

    Input is a directory root URL, plus a ``depth`` parameter for how many
    levels of subdirectories to search. All regular files will be copied. Output
    is the resultant local directory tree.
    c           
      l    ddl m                                                        |           j        d         }g }t          d|dz             D ];}|                     d                    |gdg|z  z                                  < fd|D             }g g }}t          d |D                       }	t          ||          D ]c\  }
}|
j
        |	v r+	 t          j        |j
                   )# t          $ r Y 5w xY w|                    |
           |                    |           d||fS )	Nr   r   depth   /*c                 n    g | ]1}                      |j                  gd fi j        d         2S r   r   r   s     r   rl   z(DirCache._make_files.<locals>.<listcomp>]  r   r   c              3   ^   K   | ](}t           j                            |j                  V  )d S rY   )r   r   rH   rZ   r   s     r   r\   z'DirCache._make_files.<locals>.<genexpr>_  s2      >>BGOOAF++>>>>>>r   )r   r   r6   rF   r"   r   extendr%   setru   r   r   r2   r4   rv   )r*   r=   rb   r   rd   r   re   	files_in2
files_out2pathsfinfoutr   rJ   s   `           @@r   r`   zDirCache._make_filesT  s   %%%%%%   G$$
7#q%!)$$ 	I 	IAOOJJsxx	SEAI0E'F'FGGHHHHrrrrrriqrrr	 "B:	>>X>>>>>Xy11 	( 	(ICx5  (K	****    D   %%%!!$''''*$$s   C77
DDc                 `    |                      |          }||                     |          gS dS )rh   N)rm   rL   rn   s      r   r^   zDirCache._from_metadatak  s<    w'' 	)JJw''((	) 	)r   N)r   r   r   r   r`   r^   rj   r   r   r   r   L  s<         % % %.) ) ) ) )r   r   c                   *     e Zd ZdZ fdZddZ xZS )CompressedCachea;  Cache files extracted from downloaded compressed source

    For one or more remote compressed files, downloads to local temporary dir and
    extracts all contained files to local cache. Input is URL(s) (including
    globs) pointing to remote compressed files, plus optional ``decomp``,
    which is "infer" by default (guess from file extension) or one of the
    key strings in ``intake.source.decompress.decomp``. Optional ``regex_filter``
    parameter is used to load only the extracted files that match the pattern.
    Output is the list of extracted files.
    c                    dd l }|                                ddlm |                                  || _         |dfi | j        }fd|D             }t          t          |           	                    |||d           ||fS )Nr   r   r   c                     g | ]b} t          t          j                            t          j                            |j                                      gd           d         cS r   )r   r   r   r%   rA   )rZ   r   dr   s     r   rl   z/CompressedCache._make_files.<locals>.<listcomp>  s\    yyyklZZaAQAQRSRXAYAY1Z1Z![![ \^bccdefyyyr   F)r}   )
tempfilemkdtempr   r   r6   _urlpathr'   r   r   ra   )	r*   r=   rb   r   rd   re   r   r   r   s	         @@r   r`   zCompressedCache._make_files~  s    %%%%%%   :gtEEt/DEEyyyyypxyyy	ot$$**8Ye*TTT""r   Tc           	         ddl m} |                     |          }	 t          j        |           n# t
          t          f$ r Y nw xY wd |D             }g }t          ||          D ]\  }	}
d| j        v r| j        d         dk    r| j        d         }n|		                    d          rd}n|		                    d          s|		                    d	          rd
}nt|		                    d          s|		                    d          rd}nG|		                    d          rd}n/|		                    d          rd}n|		                    d          rd}||vrt          d|	z             ||         |	|          }t          t          j        | j                            dd                    j        |          }|D ]}t                               d                    |	                     t                               d                    |
j                             t                               d                    |                     |r!|                     | j        |
j        |           |                    |           ǐ|S )Nr   )decompc                     g | ]	}|j         
S rj   )r   r   s     r   rl   z)CompressedCache._load.<locals>.<listcomp>  s    +++A+++r   r   inferz.zipru   z.tar.gzz.tgztgzz.tar.bz2z.tbztbzz.tartarz.gzgzz.bz2bzzUnknown compression for "%s"regex_filterz.*rq   rr   rs   )
decompressr   rL   r   r2   r   r   ru   r"   endswithr   filterr9   compilerB   searchrw   rx   r   r   rT   r   rv   )r*   rd   re   r=   r}   r   rJ   filesr~   r   origr   out2out3r   s                  r   ra   zCompressedCache._load  s   &&&&&&G$$	K! 	 	 	D	+++++5(++ 	 	GAt4:% $*X*>'*I Jx(F## I&& 	!**V*<*< 	J'' 1::f+=+= F## E"" F##  E !?!!CDDD6!9Q''D"*TZ^^ND%I%IJJQSWXXD  /66q99:::077	BBCCC_33B77888 E&&t}diDDD

2 
s   2 AAr   )r   r   r   r   r`   ra   r   r   s   @r   r   r   r  sV        	 	# # # # #% % % % % % % %r   r   c                        e Zd ZdZd ZddZdS )DATCachea  Use the DAT protocol to replicate data

    For details of the protocol, see https://docs.datproject.org/
    The executable ``dat`` must be available.

    Since in this case, it is not possible to access the remote files
    directly, this cache mechanism takes no parameters. The expectation
    is that the url passed by the driver is of the form:

    ::

        dat://<dat hash>/file_pattern

    where the file pattern will typically be a glob string like "\*.json".
    c                 .    |                                   dS )N)NN)r6   r   s      r   r`   zDATCache._make_files  s       zr   Tc                    dd l }ddlm} t          j                            | j        |                     |                    }t          j                            |          \  }}	dd||dg}
	 |	                    |
|j
                   n0# t          t          f$ r t                              d            w xY wt          j                            ||	          }|r* ||          D ] }|                     |||j                   d S d S )Nr   r   datclonez
--no-watch)stdoutzCalling DAT failed)
subprocessr   r   r   r   r%   r&   rF   splitcallPIPEr   r   rw   inforT   )r*   ___r=   r}   r  r   r   r  partcmdnewpathofs                r   ra   zDATCache._load  s)   %%%%%%w||DOTZZ-@-@AAGMM'**	TgsD,7	OOC
O8888! 	 	 	KK,---	 ',,tT** 	> j)) > >""7GRW====	> 	>> >s   -B
 
-B7Nr   )r   r   r   r   r`   ra   rj   r   r   r  r    sA            > > > > > >r   r  c                   V    e Zd ZdZd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
 ZddZd ZdS )r(   z_
    Utility class for managing persistent metadata stored in the Intake config directory.
    c                    ddl m} t          j        t	          |j                  d          | _        t          j        	                    | j                  }t          j        
                    |          st          j        |           t          j                            | j                  rHt          | j                  5 }t          j        |          | _        d d d            d S # 1 swxY w Y   d S i | _        d S )Nr   )configzcache_metadata.json)r   r  r$   r%   r   confdirrL   r   r   rH   r1   r2   r3   openjsonrf   r)   )r*   argsrb   r  r   r   s         r   r/   zCacheMetadata.__init__  s   !!!!!!^OFN$C$CEZ[[
GOODJ''w~~a   	KNNN7>>$*%% 	 dj!! .Q!%1. . . . . . . . . . . . . . . . . .  DNNNs   C))C-0C-c                 B    | j         |                     |                   S rY   )r)   __keytransform__r*   keys     r   __getitem__zCacheMetadata.__getitem__  s    ~d33C8899r   c                 h    || j         |                     |          <   |                                  d S rY   r)   r!  _save)r*   r#  values      r   __setitem__zCacheMetadata.__setitem__  s-    5:t,,S112

r   c                 d    | j         |                     |          = |                                  d S rY   r&  r"  s     r   __delitem__zCacheMetadata.__delitem__  s+    N400556

r   c                 *    t          | j                  S rY   )iterr)   r5   s    r   __iter__zCacheMetadata.__iter__  s    DN###r   c                 *    t          | j                  S rY   )r   r)   r5   s    r   __len__zCacheMetadata.__len__  s    4>"""r   c                 h    t          |t          t          f          rd                    |          }|S )Nr   )r   r   r   r%   r"  s     r   r!  zCacheMetadata.__keytransform__   s,    cD%=)) 	 ((3--C
r   c                     |                      |          }| j                            |g           }|                    |           || j        |<   |                                  d S rY   )r!  r)   rB   rv   r'  )r*   r#  r   entriess       r   rR   zCacheMetadata.update  s[    ##C((.$$S"--{###%s

r   c                     t          | j        d          5 }t          j        | j        |           d d d            d S # 1 swxY w Y   d S )Nw)r  rL   r  dumpr)   )r*   r   s     r   r'  zCacheMetadata._save  s    $*c"" 	)aIdna(((	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	)s   >AANc                 d    | j                             ||          }|                                  |S rY   )r)   r   r'  )r*   r#  defaultitems       r   r   zCacheMetadata.pop  s+    ~!!#w//

r   c                 N    t          | j                                                  S rY   )r   r)   r   r5   s    r   r   zCacheMetadata.keys  s    DN''))***r   rY   )r   r   r   r   r/   r$  r)  r+  r.  r0  r!  rR   r'  r   r   rj   r   r   r(   r(     s              : : :    $ $ $# # #  
  ) ) )   
+ + + + +r   r(   )r   dir
compressedr  c           
          |g S g }|D ]p}d|v r$|d         t           vrt          |d                   |                    t                              |d         t                    | ||||                     q|S )a   
    Creates Cache objects from the cache_specs provided in the catalog yaml file

    Parameters
    ----------

    driver: str
        Name of the plugin that can load catalog entry
    specs: list
        Specification for caching the data source.
    Ntype)r   r   r-   )registry
IndexErrorrv   rB   r   )r+   specsr   r   r-   r~   r,   s          r   make_cachesrB    s      	
C M MT> 	+d6l(: 	+T&\***

8<<Vi88f`i  |K  L  L  L  	M  	M  	M  	MJr   )F)!collectionsr  loggingr   r$   r9   r   r   r   hashlibr   intake.configr   intake.utilsr   r   	getLoggerrw   r   r   r   objectr   rz   r   r   r   r  abcMutableMappingr(   r?  rB  rj   r   r   <module>rL     s/         				     				                     5 5 5 5 5 5 5 5		8	$	$! ! !( #%%C' C' C' C' C' C' C' C'L<$ <$ <$ <$~/ / / / /	 / / /.#) #) #) #) #)y #) #) #)L> > > > >i > > >B&> &> &> &> &>y &> &> &>R:+ :+ :+ :+ :+KO2 :+ :+ :+z hoV^__ '+dB      r   