
    CdtR                        d dl mZ d dlZd dlZd dlZd dlmZ	 d dl
mZ d dlmZmZ d dlmZ d dlmZmZ d dlmZ 	 	 	 	 	 	 	 ddZ	 	 	 	 	 	 	 	 	 ddZd ZddZddZ	 	 	 	 	 	 	 	 	 	 dd dZeddd            ZdS )!    )annotationsN)compute)methods)from_delayedfrom_pandas)pyarrow_strings_enabled)delayedtokenize)parse_bytes256 MiB   c
           	     X
   ddl }t          |t                    s/t          dt          t	          |                    z   dz             |t          d          t          |t          |j        |j        j        j	        f          s,t          dt          t	          |                    z             |dk    s$|t          d          ||t          d          |r|rt          d	          |	i n|	}	 |j
        |fi |	}t          |t                    r|                    |          n|                    |j        |j                  }|j        |
d
<   |dk    r|                     |          }t          j        ||fi |
}t          |          dk    rt!          |d          S t#                      rddlm}m}  |              ||          }|                    dd                                          |z  }||j        dd         }|0||j                            |j        j                            |          |j        j                            |                                        |                                           }t          j        ||          }|j        d         \  }}|j        d         }n|\  }}t          j        |          j         }||j                            |j        j        !                    |                                        |                                           }t          j        ||          d         d         }tE          tG          ||z  tI          |          z                      pd}|j%        dk    rOtM          j'        t          j(        ||d||z
  )                                |z  z                      }||d<   ||d<   nY|j%        dv r.tU          j+        |||dz   |          '                                }n"t          d,                    |                    g }|dd         |dd         }}t[          t]          ||                    D ]\  }\  }}|t          |          dz
  k    r||k    n||k     }| /                    |j        0                    ||k    |                    }|1                     te          tf                    |||fd|	i|
           |4                                 tk          |||          S )ar	  
    Read SQL query into a DataFrame.

    If neither ``divisions`` or ``npartitions`` is given, the memory footprint of the
    first few rows will be determined, and partitions of size ~256MB will
    be used.

    Parameters
    ----------
    sql : SQLAlchemy Selectable
        SQL query to be executed. TextClause is not supported
    con : str
        Full sqlalchemy URI for the database connection
    index_col : str
        Column which becomes the index, and defines the partitioning. Should
        be a indexed column in the SQL server, and any orderable type. If the
        type is number or time, then partition boundaries can be inferred from
        ``npartitions`` or ``bytes_per_chunk``; otherwise must supply explicit
        ``divisions``.
    divisions: sequence
        Values of the index column to split the table by. If given, this will
        override ``npartitions`` and ``bytes_per_chunk``. The divisions are the value
        boundaries of the index column used to define the partitions. For
        example, ``divisions=list('acegikmoqsuwz')`` could be used to partition
        a string column lexographically into 12 partitions, with the implicit
        assumption that each partition contains similar numbers of records.
    npartitions : int
        Number of partitions, if ``divisions`` is not given. Will split the values
        of the index column linearly between ``limits``, if given, or the column
        max/min. The index column must be numeric or time for this to work
    limits: 2-tuple or None
        Manually give upper and lower range of values for use with ``npartitions``;
        if None, first fetches max/min from the DB. Upper limit, if
        given, is inclusive.
    bytes_per_chunk : str or int
        If both ``divisions`` and ``npartitions`` is None, this is the target size of
        each partition, in bytes
    head_rows : int
        How many rows to load for inferring the data-types, and memory per row
    meta : empty DataFrame or None
        If provided, do not attempt to infer dtypes, but use these, coercing
        all chunks on load
    engine_kwargs : dict or None
        Specific db engine parameters for sqlalchemy
    kwargs : dict
        Additional parameters to pass to `pd.read_sql()`

    Returns
    -------
    dask.dataframe

    See Also
    --------
    read_sql_table : Read SQL database table into a DataFrame.
    r   Nz'con' must be of type str, not 8Note: Dask does not support SQLAlchemy connectables herez)Must specify index column to partition onz2'index_col' must be of type str or sa.Column, not z'Must provide 'meta' if 'head_rows' is 0z=Must provide 'divisions' or 'npartitions' if 'head_rows' is 0z9Must supply either 'divisions' or 'npartitions', not both	index_col   )npartitions)check_pyarrow_string_supportedto_pyarrow_stringT)deepindexmax_1count_1Mz%iS)startendfreq)iuf)dtypezwProvided index column is of type "{}".  If divisions is not provided the index column type must be numeric or datetime.engine_kwargs)	divisions)6
sqlalchemy
isinstancestr	TypeErrortype
ValueErrorColumnsqlelementsColumnClausecreate_enginenamelimitpdread_sqllenr   r   dask.dataframe._pyarrowr   r   memory_usagesumilocselectfuncmaxminselect_fromsubquerydtypesSeriesr!   countintroundr   kindr   tolist
date_rangetotal_secondsnplinspaceformat	enumeratezipwhereand_appendr	   _read_sql_chunkdisposer   )r+   conr   r#   r   limitsbytes_per_chunk	head_rowsmetar"   kwargssaenginer   qheadr   r   bytes_per_rowminmaxmaximinir!   r@   partslowersuppersr   lowerupperconds                                  5lib/python3.11/site-packages/dask/dataframe/io/sql.pyread_sql_queryrf      sy   H c3 
-$s))nnHI
 
 	

 DEEEi#ry"&/2N!OPP 
@3tICWCWW
 
 	
 q==<FGGG!4O    U[ USTTT'/BB]MRc33]33F i%%	7		)YYy~y~66 
  *F;1}}IIi  {1f////t99>>t3333"$$ 	+       
 +*,,,$$T**D**D*AAFFHH9T<9RaR=D>&&(>(> k#,,..))  [F++FQJD$M'*EEJD$If%%+Ebfk//6677CCCLLNNSSAK6**95a8EE%-/+o2N2NNOOPPUTU  :4$;"="="?"?+"MN   I  IaL IbMMZ?**D$auMMMTTVVIIAAG  
 Ess^Yqrr]FF&s66':':;; 
 
>E5!"c&kkAo!5!5u~~55=IIbfkk%5.$7788$GO$$3 ,9=C 	
 	
 	
 	
 NNty9999    c                r    ddl ddl m} d|v r/t          j        dt                     |                    d           d|v r/t          j        dt                     |                    d          }t           t                    s,t          dt          t                               z             |M|D ]J}t          |j
        t          f          s,t          d	t          t          |                    z             Kt          |t                    s/t          d
t          t          |                    z   dz             |i n|} j        |fi |}                                }t           t                    r                     |||	           n,t          dt          t                               z             |                                 |r fd|D             nfd j        D             }t          |t                    r&
                    | j        |         j	                  n
                    |j        |j	                  }|j        d |D             vr|                    |            |j        |                                }t)          d|||||||||
|d
|S )a  
    Read SQL database table into a DataFrame.

    If neither ``divisions`` or ``npartitions`` is given, the memory footprint of the
    first few rows will be determined, and partitions of size ~256MB will
    be used.

    Parameters
    ----------
    table_name : str
        Name of SQL table in database.
    con : str
        Full sqlalchemy URI for the database connection
    index_col : str
        Column which becomes the index, and defines the partitioning. Should
        be a indexed column in the SQL server, and any orderable type. If the
        type is number or time, then partition boundaries can be inferred from
        ``npartitions`` or ``bytes_per_chunk``; otherwise must supply explicit
        ``divisions``.
    columns : sequence of str or SqlAlchemy column or None
        Which columns to select; if None, gets all. Note can be a mix of str and SqlAlchemy columns
    schema : str or None
        Pass this to sqlalchemy to select which DB schema to use within the
        URI connection
    divisions: sequence
        Values of the index column to split the table by. If given, this will
        override ``npartitions`` and ``bytes_per_chunk``. The divisions are the value
        boundaries of the index column used to define the partitions. For
        example, ``divisions=list('acegikmoqsuwz')`` could be used to partition
        a string column lexographically into 12 partitions, with the implicit
        assumption that each partition contains similar numbers of records.
    npartitions : int
        Number of partitions, if ``divisions`` is not given. Will split the values
        of the index column linearly between ``limits``, if given, or the column
        max/min. The index column must be numeric or time for this to work
    limits: 2-tuple or None
        Manually give upper and lower range of values for use with ``npartitions``;
        if None, first fetches max/min from the DB. Upper limit, if
        given, is inclusive.
    bytes_per_chunk : str or int
        If both ``divisions`` and ``npartitions`` is None, this is the target size of
        each partition, in bytes
    head_rows : int
        How many rows to load for inferring the data-types, and memory per row
    meta : empty DataFrame or None
        If provided, do not attempt to infer dtypes, but use these, coercing
        all chunks on load
    engine_kwargs : dict or None
        Specific db engine parameters for sqlalchemy
    kwargs : dict
        Additional parameters to pass to `pd.read_sql()`

    Returns
    -------
    dask.dataframe

    See Also
    --------
    read_sql_query : Read SQL query into a DataFrame.

    Examples
    --------
    >>> df = dd.read_sql_table('accounts', 'sqlite:///path/to/bank.db',
    ...                  npartitions=10, index_col='id')  # doctest: +SKIP
    r   N)r+   tablezWThe `table` keyword has been replaced by `table_name`. Please use `table_name` instead.urizGThe `uri` keyword has been replaced by `con`. Please use `con` instead.z&`table_name` must be of type str, not z8`columns` must be of type List[str], and cannot contain z`con` must be of type str, not r   )autoload_withschemac                    g | ]]}t          |t                    r&                    |j        |         j                  n                    |j        |j                  ^S  )r%   r&   r*   columnsr(   r/   ).0crW   
table_names     re   
<listcomp>z"read_sql_table.<locals>.<listcomp>B  sm     	
 	
 	
  a%%/		!Z/27888YYqvqv..		
 	
 	
rg   c                P    g | ]"}                     |j        |j                  #S rn   )r*   r/   r(   )rp   rq   rW   s     re   rs   z"read_sql_table.<locals>.<listcomp>K  s+    DDDAbii''DDDrg   c                    g | ]	}|j         
S rn   )r/   )rp   rq   s     re   rs   z"read_sql_table.<locals>.<listcomp>S  s    222Q!&222rg   )
r+   rQ   r   r#   r   rR   rS   rT   rU   r"   rn   )r$   r+   warningswarnDeprecationWarningpopr%   r&   r'   r(   r*   r.   MetaDataTablerP   ro   r/   rN   r8   r<   rf   )rr   rQ   r   r#   r   rR   ro   rS   rT   rl   rU   r"   rV   r+   colrX   mr   queryrW   s   `                  @re   read_sql_tabler      sN   ` &e	
 	
 	
 ZZ((
U	
 	
 	
 jjj#&& 
4s4
;K;K7L7LL
 
 	
  	 	CcBIs#344 N$s))nn%   c3 
-$s))nnHI
 
 	
 (/BB]MRc33]33F
A*c"" 
XXj!6&XQQ

4s4
;K;K7L7LL
 
 	
 NN 		E 	
 	
 	
 	
 	
 	
 	
 	
 	
 EDDD1CDDD  i%%	7		)Z/	:?@@@YYy~y~66 
 z22'22222uCJ ,,Z88E '#    rg   c                h    t          | t                    rt          | ||fi |S t          | ||fi |S )a  
    Read SQL query or database table into a DataFrame.

    This function is a convenience wrapper around ``read_sql_table`` and
    ``read_sql_query``. It will delegate to the specific function depending
    on the provided input. A SQL query will be routed to ``read_sql_query``,
    while a database table name will be routed to ``read_sql_table``.
    Note that the delegated function might have more specific notes about
    their functionality not listed here.

    Parameters
    ----------
    sql : str or SQLAlchemy Selectable
        Name of SQL table in database or SQL query to be executed. TextClause is not supported
    con : str
        Full sqlalchemy URI for the database connection
    index_col : str
        Column which becomes the index, and defines the partitioning. Should
        be a indexed column in the SQL server, and any orderable type. If the
        type is number or time, then partition boundaries can be inferred from
        ``npartitions`` or ``bytes_per_chunk``; otherwise must supply explicit
        ``divisions``.

    Returns
    -------
    dask.dataframe

    See Also
    --------
    read_sql_table : Read SQL database table into a DataFrame.
    read_sql_query : Read SQL query into a DataFrame.
    )r%   r&   r   rf   )r+   rQ   r   rV   s       re   r2   r2   g  sK    B #s =c3	<<V<<<c3	<<V<<<rg   c                Z   dd l }|pi } |j        |fi |}t          j        | |fi |}|                                 t          |          dk    r|S t          |j                                                  dk    r|S |                    |j                                        d          S )Nr   F)copy)	r$   r.   r1   r2   rP   r3   r>   to_dictastype)rY   rj   rU   r"   rV   rW   rX   dfs           re   rO   rO     s    !'RMRc33]33F	Q	)	)&	)	)B
NN
2ww!||	T[  ""	#	#q	(	( 	yy,,..Uy;;;rg   c                x    dd l }|pi } |j        |fi |} | j        dd|i|}|                                 |S )Nr   rQ   rn   )r$   r.   to_sqlrP   )drj   r"   rV   rW   rX   rY   s          re   _to_sql_chunkr     sc    !'RMRc33]33F&&V&v&&A
NNHrg   failTFr/   r&   rj   	if_existsr   boolc                N   t          |t                    s t          dt          |           d          t	          ||||||||||	
  
        } t          t                    | j        fi |t	          |d          |r"fd|                                 D             }n]g }}|                                 D ]D}|	                    t          t          |fd|idd	t          |fi z  i           |d
         }Et          |          }|
rt          |           dS |S )a  Store Dask Dataframe to a SQL table

    An empty table is created based on the "meta" DataFrame (and conforming to the caller's "if_exists" preference), and
    then each block calls pd.DataFrame.to_sql (with `if_exists="append"`).

    Databases supported by SQLAlchemy [1]_ are supported. Tables can be
    newly created, appended to, or overwritten.

    Parameters
    ----------
    name : str
        Name of SQL table.
    uri : string
        Full sqlalchemy URI for the database connection
    schema : str, optional
        Specify the schema (if database flavor supports this). If None, use
        default schema.
    if_exists : {'fail', 'replace', 'append'}, default 'fail'
        How to behave if the table already exists.

        * fail: Raise a ValueError.
        * replace: Drop the table before inserting new values.
        * append: Insert new values to the existing table.

    index : bool, default True
        Write DataFrame index as a column. Uses `index_label` as the column
        name in the table.
    index_label : str or sequence, default None
        Column label for index column(s). If None is given (default) and
        `index` is True, then the index names are used.
        A sequence should be given if the DataFrame uses MultiIndex.
    chunksize : int, optional
        Specify the number of rows in each batch to be written at a time.
        By default, all rows will be written at once.
    dtype : dict or scalar, optional
        Specifying the datatype for columns. If a dictionary is used, the
        keys should be the column names and the values should be the
        SQLAlchemy types or strings for the sqlite3 legacy mode. If a
        scalar is provided, it will be applied to all columns.
    method : {None, 'multi', callable}, optional
        Controls the SQL insertion clause used:

        * None : Uses standard SQL ``INSERT`` clause (one per row).
        * 'multi': Pass multiple values in a single ``INSERT`` clause.
        * callable with signature ``(pd_table, conn, keys, data_iter)``.

        Details and a sample callable implementation can be found in the
        section :ref:`insert method <io.sql.method>`.
    compute : bool, default True
        When true, call dask.compute and perform the load into SQL; otherwise, return a Dask object (or array of
        per-block objects when parallel=True)
    parallel : bool, default False
        When true, have each block append itself to the DB table concurrently. This can result in DB rows being in a
        different order than the source DataFrame's corresponding rows. When false, load each block into the SQL DB in
        sequence.
    engine_kwargs : dict or None
        Specific db engine parameters for sqlalchemy

    Raises
    ------
    ValueError
        When the table already exists and `if_exists` is 'fail' (the
        default).

    See Also
    --------
    read_sql : Read a DataFrame from a table.

    Notes
    -----
    Timezone aware datetime columns will be written as
    ``Timestamp with timezone`` type with SQLAlchemy if supported by the
    database. Otherwise, the datetimes will be stored as timezone unaware
    timestamps local to the original timezone.

    .. versionadded:: 0.24.0

    References
    ----------
    .. [1] https://docs.sqlalchemy.org
    .. [2] https://www.python.org/dev/peps/pep-0249/

    Examples
    --------
    Create a table from scratch with 4 rows.

    >>> import pandas as pd
    >>> import dask.dataframe as dd
    >>> df = pd.DataFrame([ {'i':i, 's':str(i)*2 } for i in range(4) ])
    >>> ddf = dd.from_pandas(df, npartitions=2)
    >>> ddf  # doctest: +SKIP
    Dask DataFrame Structure:
                       i       s
    npartitions=2
    0              int64  object
    2                ...     ...
    3                ...     ...
    Dask Name: from_pandas, 2 tasks

    >>> from dask.utils import tmpfile
    >>> from sqlalchemy import create_engine
    >>> with tmpfile() as f:
    ...     db = 'sqlite:///%s' %f
    ...     ddf.to_sql('test', db)
    ...     engine = create_engine(db, echo=False)
    ...     result = engine.execute("SELECT * FROM test").fetchall()
    >>> result
    [(0, 0, '00'), (1, 1, '11'), (2, 2, '22'), (3, 3, '33')]
    z!Expected URI to be a string, got .)
r/   rj   r"   rl   r   r   index_label	chunksizer!   methodrN   )r   c                ^    g | ])}t          t          |fd iddt          |fi z  i*S )extrasdask_key_name	to_sql-%s)_extra_depsr   r
   )rp   r   	meta_taskworker_kwargss     re   rs   zto_sql.<locals>.<listcomp>?  s}     	
 	
 	
    !  	 
 *HQ,H,H-,H,HH  	
 	
 	
rg   r   r   r   r   N)r%   r&   r)   r(   dictr	   r   _meta
to_delayedrN   r   r
   dask_compute)r   r/   rj   rl   r   r   r   r   r!   r   r   parallelr"   rV   resultlastr   r   r   s                    @@re   r   r     s   x c3 KIT#YYIIIJJJ #  F '&&rx::6::I 8444M 	
 	
 	
 	
 	
 ]]__	
 	
 	
  
	 
	AMM!    $	 
 #.0L0Lm0L0L"L     ":DDV__F Vrg   )r   c                    | |i |S Nrn   )r9   r   argsrV   s       re   r   r   `  s    4    rg   )NNNr   r   NN)	NNNNr   r   NNNr   )
Nr   TNNNNTFN)r/   r&   rj   r&   r   r&   r   r   )
__future__r   rv   numpyrG   pandasr1   	dask.baser   r   dask.dataframer   dask.dataframe.io.ior   r   dask.dataframe.utilsr   dask.delayedr	   r
   
dask.utilsr   rf   r   r2   rO   r   r   r   rn   rg   re   <module>r      s   " " " " " "          - - - - - - " " " " " " : : : : : : : : 8 8 8 8 8 8 * * * * * * * * " " " " " " 	p: p: p: p:n 	a a a aH$= $= $=N< < < <"	 	 	 	  
r r r r rj 	$( ! ! ! ! 	! ! !rg   