
    
fy                     2   d Z ddlZddlZddlZddlZddlmZ ddlZddl	Z	ddl	m
Z
 ddlmZ ddlmZ ddlmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ 	   eed          d          Zn# e $ r d ZY nw xY wd Z!d Z"d Z# G d de
j$                  Z% G d de%          Z&ed             Z' G d de%          Z( G d de)          Z* G d de%ej+                  Z, G d de%ej+                  Z-d*d Z.d*d!Z/d" Z0d# Z1d$Z2d% Z3d+d(Z4d) Z5dS ),z
A CUDA ND Array is recognized by checking the __cuda_memory__ attribute
on the object.  If it exists and evaluate to True, it must define shape,
strides, dtype and size attributes similar to a NumPy ndarray.
    N)c_void_p)_devicearray)devices)driver)typesconfig)to_fixed_tuple)
dummyarray)numpy_support)prepare_shape_strides_dtype)NumbaPerformanceWarning)warn	lru_cachec                     | S N )funcs    >lib/python3.11/site-packages/numba/cuda/cudadrv/devicearray.pyr   r      s        c                 $    t          | dd          S )z$Check if an object is a CUDA ndarray__cuda_ndarray__F)getattrobjs    r   is_cuda_ndarrayr   #   s    3*E222r   c                      t                       fd} |dt                      |dt                      |dt          j                    |dt                     dS )z,Verify the CUDA ndarray interface for an objc                     t          |           st          |           t          t          |           |          st          | d|          d S )Nz must be of type )hasattrAttributeError
isinstancer   )attrtypr   s     r   requires_attrz4verify_cuda_ndarray_interface.<locals>.requires_attr,   se    sD!! 	' &&&'#t,,c22 	H DDD##!FGGG	H 	Hr   shapestridesdtypesizeN)require_cuda_ndarraytuplenpr&   int)r   r#   s   ` r   verify_cuda_ndarray_interfacer,   (   s    H H H H H M'5!!!M)U###M'28$$$M&#r   c                 B    t          |           st          d          dS )z9Raises ValueError is is_cuda_ndarray(obj) evaluates Falsezrequire an cuda ndarray objectN)r   
ValueErrorr   s    r   r(   r(   8   s+    3 ;9:::; ;r   c                      e Zd ZdZdZdZddZed             ZddZ	ed             Z
dd	Zd
 Zed             Zed             Zej        dd            Zej        dd            ZddZd Zd ZddZd Zed             ZdS )DeviceNDArrayBasez$A on GPU NDArray representation
    Tr   Nc                    t          |t                    r|f}t          |t                    r|f}t          j        |          }t	          |          | _        t	          |          | j        k    rt          d          t          j        	                    d|||j
                  | _        t          |          | _        t          |          | _        || _        t          t          j        t"          j        | j        d                    | _        | j        dk    rw|[t)          j        | j        | j        | j        j
                  | _        t/          j                                        | j                  }nt)          j        |          | _        njt(          j        r t(          j                            d          }nt=          d          }t)          j        t/          j                    |d          }d| _        || _         || _!        dS )a5  
        Args
        ----

        shape
            array shape.
        strides
            array strides.
        dtype
            data type as np.dtype coercible object.
        stream
            cuda stream.
        gpu_data
            user provided device memory for the ndarray data buffer
        zstrides not match ndimr      N)contextpointerr'   )"r    r+   r*   r&   lenndimr.   r
   Array	from_descitemsize_dummyr)   r$   r%   	functoolsreduceoperatormulr'   _drivermemory_size_from_info
alloc_sizer   get_contextmemallocdevice_memory_sizeUSE_NV_BINDINGbindingCUdeviceptrr   MemoryPointergpu_datastream)selfr$   r%   r&   rJ   rI   nulls          r   __init__zDeviceNDArrayBase.__init__D   s     eS!! 	HEgs## 	!jGJJ	w<<49$$5666 &00E716A A5\\
W~~
	(tz1EEFF	9q==")"?Jdj.A#C #C".0099$/JJ")"<X"F"F % #22155{{,W5H5J5J59C C CHDO r   c                 `   t           j        r| j        t          | j                  }nd}n| j        j        | j        j        }nd}t          | j                  t          |           rd nt          | j                  |df| j	        j
        | j        dk    rt          | j                  nd ddS )Nr   F   )r$   r%   datatypestrrJ   version)r?   rE   device_ctypes_pointerr+   valuer)   r$   is_contiguousr%   r&   strrJ   )rK   ptrs     r   __cuda_array_interface__z*DeviceNDArrayBase.__cuda_array_interface__w   s    ! 		)5$455)/;06 4:&&,T22Kttdl8K8K%Lz~*.+*:*:c$+&&&
 
 	
r   c                 <    t          j         |           }||_        |S )zBind a CUDA stream to this object so that all subsequent operation
        on this array defaults to the given stream.
        )copyrJ   )rK   rJ   clones      r   bindzDeviceNDArrayBase.bind   s     	$r   c                 *    |                                  S r   	transposerK   s    r   TzDeviceNDArrayBase.T   s    ~~r   c                 T   |r4t          |          t          t          | j                            k    r| S | j        dk    rd}t          |          |Dt	          |          t	          t          | j                            k    rt          d|          ddlm}  ||           S )N   z2transposing a non-2D DeviceNDArray isn't supportedzinvalid axes list r   r^   )r)   ranger6   NotImplementedErrorsetr.   numba.cuda.kernels.transposer_   )rK   axesmsgr_   s       r   r_   zDeviceNDArrayBase.transpose   s     		#E$KK5ty)9)9#:#:::KY!^^FC%c***#d))s53C3C/D/D"D"D*tt=>>>>>>>>>9T??"r   c                     |s| j         n|S r   rJ   )rK   rJ   s     r   _default_streamz!DeviceNDArrayBase._default_stream   s    "(4t{{f4r   c                     d| j         v }| j        d         r|sd}n| j        d         r|sd}nd}t          j        | j                  }t          j        || j        |          S )n
        Magic attribute expected by Numba to get the numba type that
        represents this object.
        r   C_CONTIGUOUSCF_CONTIGUOUSFA)r%   flagsr   
from_dtyper&   r   r7   r6   )rK   	broadcastlayoutr&   s       r   _numba_type_zDeviceNDArrayBase._numba_type_   sw    ( %	:n% 	i 	FFZ' 		 	FFF(44{5$)V444r   c                     | j         :t          j        rt          j                            d          S t          d          S | j         j        S )z:Returns the ctypes pointer to the GPU data buffer
        Nr   )rI   r?   rE   rF   rG   r   rS   r`   s    r   rS   z'DeviceNDArrayBase.device_ctypes_pointer   sD     = % #221555{{"=66r   c                    |j         dk    rdS t          |            |                     |          }t          |           t          |          }}t	          j        |          r>t          |           t          ||           t	          j        | || j        |           dS t          j
        ||j        d         rdndd|j        d          	          }t          ||           t	          j        | || j        |           dS )
zCopy `ary` to `self`.

        If `ary` is a CUDA memory, perform a device-to-device transfer.
        Otherwise, perform a a host-to-device transfer.
        r   Nrk   ro   rp   rr   T	WRITEABLE)ordersubokrZ   )r'   sentry_contiguousrl   
array_corer?   is_device_memorycheck_array_compatibilitydevice_to_devicerA   r*   arrayrt   host_to_device)rK   aryrJ   	self_coreary_cores        r   copy_to_devicez DeviceNDArrayBase.copy_to_device   s    8q==F$%%f--(..
38	#C(( 	2c"""%i:::$T3OOOOOO x&_^<Ecc#!44	6 6 6H
 &i:::"44?*02 2 2 2 2 2r   c                 X   t          d | j        D                       r)d}t          |                    | j                            | j        dk    s
J d            |                     |          }|&t          j        | j        t          j                  }nt          | |           |}| j        dk    rt          j        || | j        |           |T| j        dk    r"t          j        | j        | j        |          }n't          j        | j        | j        | j        |	          }|S )
a^  Copy ``self`` to ``ary`` or create a new Numpy ndarray
        if ``ary`` is ``None``.

        If a CUDA ``stream`` is given, then the transfer will be made
        asynchronously as part as the given stream.  Otherwise, the transfer is
        synchronous: the function returns after the copy is finished.

        Always returns the host array.

        Example::

            import numpy as np
            from numba import cuda

            arr = np.arange(1000)
            d_arr = cuda.to_device(arr)

            my_kernel[100, 100](d_arr)

            result_array = d_arr.copy_to_host()
        c              3   "   K   | ]
}|d k     V  dS r   Nr   ).0ss     r   	<genexpr>z1DeviceNDArrayBase.copy_to_host.<locals>.<genexpr>	  s&      ++q1u++++++r   z2D->H copy not implemented for negative strides: {}r   zNegative memory sizeNr$   r&   rk   )r$   r&   buffer)r$   r&   r%   r   )anyr%   re   formatrA   rl   r*   emptybyter   r?   device_to_hostr'   ndarrayr$   r&   )rK   r   rJ   ri   hostarys        r   copy_to_hostzDeviceNDArrayBase.copy_to_host   s@   . ++dl+++++ 	@FC%cjj&>&>???!###%;###%%f--;hT_BGDDDGG%dC000G?a"7D$/*02 2 2 2 ;yA~~*4:TZ,35 5 5 *4:TZ-1\'K K Kr   c              #   0  K   |                      |          }| j        dk    rt          d          | j        d         | j        j        k    rt          d          t          t          j        t          | j
                  |z                      }| j        }| j        j        }t          |          D ]a}||z  }t          ||z   | j
                  }||z
  f}	| j                            ||z  ||z            }
t          |	|| j        ||
          V  bdS )zSplit the array into equal partition of the `section` size.
        If the array cannot be equally divided, the last section will be
        smaller.
        r2   zonly support 1d arrayr   zonly support unit strider&   rJ   rI   N)rl   r6   r.   r%   r&   r9   r+   mathceilfloatr'   rd   minrI   viewDeviceNDArray)rK   sectionrJ   nsectr%   r9   ibeginendr$   rI   s              r   splitzDeviceNDArrayBase.split!  s,     
 %%f--9>>4555<?dj1117888DIeDI..899::,:&u 	3 	3AKEegoty11C5[NE}))%(*:C(NKKHwdj)13 3 3 3 3 3 3	3 	3r   c                     | j         S )zEReturns a device memory object that is used as the argument.
        )rI   r`   s    r   as_cuda_argzDeviceNDArrayBase.as_cuda_arg6  s     }r   c                     t          j                                        | j                  }t	          | j        | j        | j                  }t          ||          S )z
        Returns a *IpcArrayHandle* object that is safe to serialize and transfer
        to another process to share the local allocation.

        Note: this feature is only available on Linux.
        )r$   r%   r&   )
ipc_handle
array_desc)	r   rB   get_ipc_handlerI   dictr$   r%   r&   IpcArrayHandle)rK   ipchdescs      r   r   z DeviceNDArrayBase.get_ipc_handle;  sO     "$$33DMBB$*dl$*MMM$????r   c                     | j                             |          \  }}t          |j        |j        | j        |                     |          | j                  S )a(  
        Remove axes of size one from the array shape.

        Parameters
        ----------
        axis : None or int or tuple of ints, optional
            Subset of dimensions to remove. A `ValueError` is raised if an axis
            with size greater than one is selected. If `None`, all axes with
            size one are removed.
        stream : cuda stream or 0, optional
            Default stream for the returned view of the array.

        Returns
        -------
        DeviceNDArray
            Squeezed view into the array.

        )axisr$   r%   r&   rJ   rI   )r:   squeezer   r$   r%   r&   rl   rI   )rK   r   rJ   	new_dummy_s        r   r   zDeviceNDArrayBase.squeezeF  s]    & {***55	1/%*''//]
 
 
 	
r   c                    t          j        |          }t          | j                  }t          | j                  }| j        j        |j        k    rp|                                 st          d          t          |d         | j        j        z  |j                  \  |d<   }|dk    rt          d          |j        |d<   t          |||| j
        | j                  S )zeReturns a new object by reinterpretting the dtype without making a
        copy of the data.
        zHTo change to a dtype of a different size, the array must be C-contiguousr   zuWhen changing to a larger dtype, its size must be a divisor of the total size in bytes of the last axis of the array.r   )r*   r&   listr$   r%   r9   is_c_contiguousr.   divmodr   rJ   rI   )rK   r&   r$   r%   rems        r   r   zDeviceNDArrayBase.viewb  s     TZ  t|$$:%.00''))  6  
 $b	DJ// NE"Is
 axx 6    .GBK;]
 
 
 	
r   c                 *    | j         j        | j        z  S r   )r&   r9   r'   r`   s    r   nbyteszDeviceNDArrayBase.nbytes  s    
 z"TY..r   r   r   r   Nr   )__name__
__module____qualname____doc____cuda_memory__r   rM   propertyrX   r\   ra   r_   rl   rx   rS   r   require_contextr   r   r   r   r   r   r   r   r   r   r   r0   r0   >   s        O1 1 1 1f 
 
 X
*        X 
# 
# 
# 
#5 5 5 5 5 X5< 	7 	7 X	7 2 2 2 2< , , , ,\3 3 3 3*  
	@ 	@ 	@
 
 
 
8#
 #
 #
J / / X/ / /r   r0   c                        e Zd ZdZd fd	Zed             Zed             Zej	        d             Z
ej	        dd            Zdd	Zej	        d
             Zej	        dd            ZddZ xZS )DeviceRecordz
    An on-GPU record type
    r   Nc                 h    d}d}t          t          |                               |||||           d S Nr   )superr   rM   )rK   r&   rJ   rI   r$   r%   	__class__s         r   rM   zDeviceRecord.__init__  sD    lD!!**5'5&+3	5 	5 	5 	5 	5r   c                 4    t          | j        j                  S z
        For `numpy.ndarray` compatibility. Ideally this would return a
        `np.core.multiarray.flagsobj`, but that needs to be constructed
        with an existing `numpy.ndarray` (as the C- and F- contiguous flags
        aren't writeable).
        r   r:   rt   r`   s    r   rt   zDeviceRecord.flags       DK%&&&r   c                 4    t          j        | j                  S )rn   )r   ru   r&   r`   s    r   rx   zDeviceRecord._numba_type_  s     '
333r   c                 ,    |                      |          S r   _do_getitemrK   items     r   __getitem__zDeviceRecord.__getitem__      %%%r   c                 .    |                      ||          S z0Do `__getitem__(item)` with CUDA stream
        r   rK   r   rJ   s      r   getitemzDeviceRecord.getitem       f---r   c                    |                      |          }| j        j        |         \  }}| j                            |          }|j        dk    rT|j        t          |||          S t          j	        d|          }t          j        |||j        |           |d         S t          |j        d |j        d         d          \  }}}	t          |||	||          S )	Nr   r   r2   r&   dstsrcr'   rJ   r   rp   r$   r%   r&   rI   rJ   )rl   r&   fieldsrI   r   r$   namesr   r*   r   r?   r   r9   r   subdtyper   )
rK   r   rJ   r"   offsetnewdatar   r$   r%   r&   s
             r   r   zDeviceRecord._do_getitem  s	   %%f--j'-V-$$V,,9??y$##f-46 6 6 6 (1C000&7,/L.46 6 6 6 1: ,CI,0,/LOSB B "E7E !ug',w(.0 0 0 0r   c                 .    |                      ||          S r   _do_setitemrK   keyrT   s      r   __setitem__zDeviceRecord.__setitem__      U+++r   c                 2    |                      |||          S z6Do `__setitem__(key, value)` with CUDA stream
        rk   r   rK   r   rT   rJ   s       r   setitemzDeviceRecord.setitem       U6:::r   c                    |                      |          }| }|r't          j                    }|                                }| j        j        |         \  }}| j                            |          } t          |           |||          }	t          |	j                            |          |          \  }
}t          j        |	|
|
j        j        |           |r|                                 d S d S )Nr   rk   )rl   r   rB   get_default_streamr&   r   rI   r   typeauto_devicer?   r   r9   synchronize)rK   r   rT   rJ   synchronousctxr"   r   r   lhsrhsr   s               r   r   zDeviceRecord._do_setitem  s    %%f--
 !j 	.%''C++--F j',V-$$V,,d4jjs6GDDD SY^^E226BBBQ 	 c39+=vFFF 	!     	! 	!r   r   r   )r   r   r   r   rM   r   rt   rx   r   r   r   r   r   r   r   r   __classcell__)r   s   @r   r   r     s+        5 5 5 5 5 5 ' ' X' 4 4 X4 & & & . . . .
0 0 0 00 , , , ; ; ; ;
! ! ! ! ! ! ! !r   r   c                 l     ddl m  dk    rj        d             }|S j         fd            }|S )z
    A separate method so we don't need to compile code every assignment (!).

    :param ndim: We need to have static array sizes for cuda.local.array, so
        bake in the number of dimensions into the kernel
    r   )cudac                     |d         | d<   d S r   r   )r   r   s     r   kernelz_assign_kernel.<locals>.kernel  s    "gCGGGr   c                                         d          }d}t          | j                  D ]}|| j        |         z  }||k    rd S j                            dft          j                  }t          dz
  dd          D ]N}|| j        |         z  |d|f<   || j        |         z  |j        |         dk    z  |d|f<   || j        |         z  }O|t          |d                            | t          |d                   <   d S )Nr2   rc   r   r   r   )	gridrd   r6   r$   localr   r   int64r	   )r   r   location
n_elementsr   idxr  r6   s         r   r  z_assign_kernel.<locals>.kernel  s    99Q<<
sx 	' 	'A#)A,&JJz!! F jd)+    taxR(( 	& 	&A 39Q</C1I!CIaL0SYq\A5EFC1I1%HH,/s1vt0L0L,MN3q64(()))r   )numbar  jit)r6   r  r  s   ` @r   _assign_kernelr    sx     qyy		 	 
		XN N N N N XN. Mr   c                       e Zd ZdZd Zed             Zd ZddZd Z	d Z
ddZej        d             Zej        dd            ZddZej        d             Zej        dd            ZddZdS )r   z
    An on-GPU array type
    c                     | j         j        S )zA
        Return true if the array is Fortran-contiguous.
        )r:   is_f_contigr`   s    r   is_f_contiguouszDeviceNDArray.is_f_contiguous&       {&&r   c                 4    t          | j        j                  S r   r   r`   s    r   rt   zDeviceNDArray.flags,  r   r   c                     | j         j        S )z;
        Return true if the array is C-contiguous.
        )r:   is_c_contigr`   s    r   r   zDeviceNDArray.is_c_contiguous6  r  r   Nc                     |r'|                                                      |          S |                                                                  S )zE
        :return: an `numpy.ndarray`, so copies to the host.
        )r   	__array__)rK   r&   s     r   r  zDeviceNDArray.__array__<  sJ      	3$$&&00777$$&&00222r   c                     | j         d         S r   )r$   r`   s    r   __len__zDeviceNDArray.__len__E  s    z!}r   c                    t          |          dk    r*t          |d         t          t          f          r|d         }t	          |           }|| j        k    r# || j        | j        | j        | j                  S  | j	        j
        |i |\  }}|| j	        j        gk    r# ||j        |j        | j        | j                  S t          d          )z
        Reshape the array without changing its contents, similarly to
        :meth:`numpy.ndarray.reshape`. Example::

            d_arr = d_arr.reshape(20, 50, order='F')
        r2   r   )r$   r%   r&   rI   operation requires copying)r5   r    r)   r   r   r$   r%   r&   rI   r:   reshapeextentre   )rK   newshapekwsclsnewarrextentss         r   r  zDeviceNDArray.reshapeH  s     x==A*Xa[5$-"H"H{H4jjtz!!3TZ!Z$-A A A A .$+-x?3??t{)***3V\6>!Z$-A A A A &&BCCCr   rp   r   c                    |                      |          }t          |           }| j                            |          \  }}|| j        j        gk    r$ ||j        |j        | j        | j        |          S t          d          )z
        Flattens a contiguous array without changing its contents, similar to
        :meth:`numpy.ndarray.ravel`. If the array is not contiguous, raises an
        exception.
        )r|   r   r  )
rl   r   r:   ravelr  r$   r%   r&   rI   re   )rK   r|   rJ   r   r!  r"  s         r   r$  zDeviceNDArray.ravel`  s     %%f--4jj+++%+88t{)***3V\6>!Z$-$& & & &
 &&BCCCr   c                 ,    |                      |          S r   r   r   s     r   r   zDeviceNDArray.__getitem__r  r   r   c                 .    |                      ||          S r   r   r   s      r   r   zDeviceNDArray.getitemv  r   r   c                    |                      |          }| j                            |          }t          |                                          }t          |           }t          |          dk    r | j        j        |d          }|j	        sh| j
        j        t          | j
        ||          S t          j        d| j
                  }t          j        ||| j        j        |           |d         S  ||j        |j        | j
        ||          S  | j        j        |j         } ||j        |j        | j
        ||          S )Nr2   r   r   r   r   r   )rl   r:   r   r   iter_contiguous_extentr   r5   rI   r   is_arrayr&   r   r   r*   r   r?   r   r9   r$   r%   r  )rK   r   rJ   arrr"  r   r   r   s           r   r   zDeviceNDArray._do_getitem|  sh   %%f--k%%d++s1133444jjw<<1(dm('!*5G< N:#/'dj18: : : : !hq
;;;G*wG040D28: : : : qz!sCK!%gfN N N N )dm(#*5G3SY!Z'&J J J Jr   c                 .    |                      ||          S r   r   r   s      r   r   zDeviceNDArray.__setitem__  r   r   c                 2    |                      |||          S r   r   r   s       r   r   zDeviceNDArray.setitem  r   r   c                    |                      |          }| }|r't          j                    }|                                }| j                            |          } | j        j        |j         }t          |t          j                  rd}d}	n|j        }|j        }	 t          |           ||	| j        ||          }
t!          ||d          \  }}|j        |
j        k    r t%          d|j        d|
j        d          t'          j        |
j        t&          j                  }|j        ||
j        |j        z
  d <    |j        | }t/          t1          |
j        |j                            D ])\  }\  }}|d	k    r||k    rt%          d
|||fz            *t3          j        t6          j        |
j        d	          } t;          |
j                                      ||          |
|           |r|                                 d S d S )Nr   r   T)rJ   user_explicitzCan't assign z-D array to z-D selfr   r2   zCCan't copy sequence with size %d to array axis %d with dimension %drk   ) rl   r   rB   r   r:   r   rI   r   r  r    r
   Elementr$   r%   r   r&   r   r6   r.   r*   onesr  r  	enumeratezipr;   r<   r=   r>   r  forallr   )rK   r   rT   rJ   r   r   r*  r   r$   r%   r   r   r   	rhs_shaper   lrr	  s                     r   r   zDeviceNDArray._do_setitem  s?   %%f--
 !j 	.%''C++--F k%%c**$$-$cj1c:-.. 	"EGGIEkGd4jj*   U6FFFQ8ch*    GCHBH555	*-)	#(SX%&&'ck9%"3sy#)#<#<== 	K 	KIAv1Avv!q&&  "=ABAqz"J K K K
 %hlCIqAA
Bsx  ''
6'BB3LLL 	!     	! 	!r   r   )rp   r   r   )r   r   r   r   r  r   rt   r   r  r  r  r$  r   r   r   r   r   r   r   r   r   r   r   r   r   "  sV        ' ' ' ' ' X'' ' '3 3 3 3  D D D0D D D D$ & & & . . . .
J J J J: , , , ; ; ; ;
5! 5! 5! 5! 5! 5!r   r   c                   0    e Zd ZdZd Zd Zd Zd Zd ZdS )r   a"  
    An IPC array handle that can be serialized and transfer to another process
    in the same machine for share a GPU allocation.

    On the destination process, use the *.open()* method to creates a new
    *DeviceNDArray* object that shares the allocation from the original process.
    To release the resources, call the *.close()* method.  After that, the
    destination can no longer use the shared array object.  (Note: the
    underlying weakref to the resource is now dead.)

    This object implements the context-manager interface that calls the
    *.open()* and *.close()* method automatically::

        with the_ipc_array_handle as ipc_array:
            # use ipc_array here as a normal gpu array object
            some_code(ipc_array)
        # ipc_array is dead at this point
    c                 "    || _         || _        d S r   )_array_desc_ipc_handle)rK   r   r   s      r   rM   zIpcArrayHandle.__init__  s    %%r   c                 ~    | j                             t          j                              }t	          dd|i| j        S )z
        Returns a new *DeviceNDArray* that shares the allocation from the
        original process.  Must not be used on the original process.
        rI   r   )r:  openr   rB   r   r9  )rK   dptrs     r   r<  zIpcArrayHandle.open  s?    
 $$W%8%:%:;;??d?d.>???r   c                 8    | j                                          dS )z5
        Closes the IPC handle to the array.
        N)r:  closer`   s    r   r?  zIpcArrayHandle.close  s     	     r   c                 *    |                                  S r   )r<  r`   s    r   	__enter__zIpcArrayHandle.__enter__   s    yy{{r   c                 .    |                                   d S r   )r?  )rK   r   rT   	tracebacks       r   __exit__zIpcArrayHandle.__exit__  s    

r   N)	r   r   r   r   rM   r<  r?  rA  rD  r   r   r   r   r     sl         $& & &@ @ @! ! !      r   r   c                       e Zd ZdZddZdS )MappedNDArrayz4
    A host array that uses CUDA mapped memory.
    r   c                 "    || _         || _        d S r   rI   rJ   rK   rI   rJ   s      r   device_setupzMappedNDArray.device_setup       r   Nr   r   r   r   r   rJ  r   r   r   rF  rF    2              r   rF  c                       e Zd ZdZddZdS )ManagedNDArrayz5
    A host array that uses CUDA managed memory.
    r   c                 "    || _         || _        d S r   rH  rI  s      r   rJ  zManagedNDArray.device_setup  rK  r   Nr   rL  r   r   r   rO  rO    rM  r   rO  c                 H    t          | j        | j        | j        ||          S )z/Create a DeviceNDArray object that is like ary.rJ   rI   )r   r$   r%   r&   )r   rJ   rI   s      r   from_array_likerS    s*    CK6"*, , , ,r   c                 0    t          | j        ||          S )z.Create a DeviceRecord object that is like rec.rR  )r   r&   )recrJ   rI   s      r   from_record_likerV  !  s    	&8DDDDr   c                     | j         r| j        s| S g }| j         D ],}|                    |dk    rdnt          d                     -| t	          |                   S )aG  
    Extract the repeated core of a broadcast array.

    Broadcast arrays are by definition non-contiguous due to repeated
    dimensions, i.e., dimensions with stride 0. In order to ascertain memory
    contiguity and copy the underlying data from such arrays, we must create
    a view without the repeated dimensions.

    r   N)r%   r'   appendslicer)   )r   
core_indexstrides      r   r   r   &  sn     ; ch 
J+ = =v{{!!d<<<<uZ  !!r   c                     | j         j        }t          t          | j                  t          | j                            D ]\  }}|dk    r|dk    r||k    r dS ||z  } dS )z
    Returns True iff `ary` is C-style contiguous while ignoring
    broadcasted and 1-sized dimensions.
    As opposed to array_core(), it does not call require_context(),
    which can be quite expensive.
    r2   r   FT)r&   r9   r2  reversedr$   r%   )r   r'   r$   r[  s       r   rU   rU   8  so     9DXci00(3;2G2GHH  v1991v~~uuEMD4r   zArray contains non-contiguous buffer and cannot be transferred as a single memory region. Please ensure contiguous buffer with numpy .ascontiguousarray()c                     t          |           }|j        d         s!|j        d         st          t                    d S d S )Nro   rq   )r   rt   r.   errmsg_contiguous_buffer)r   cores     r   r~   r~   N  sM    c??D:n% 3dj.H 312223 3 3 3r   TFc                 J   t          j        |           r| dfS t          | d          r!t          j                            |           dfS t          | t          j                  rt          | |          }n7t          j
        | dd          } t          |            t          | |          }|rrt          j        rO|sMt          | t                    s8t          | t          j                  rd}t#          t%          |                     |                    | |           |dfS )z
    Create a DeviceRecord or DeviceArray like obj and optionally copy data from
    host to device. If obj already represents device memory, it is returned and
    no copy is made.
    FrX   rk   T)rZ   r}   zGHost array used in CUDA kernel will incur copy overhead to/from device.)r?   r   r   r  r  as_cuda_arrayr    r*   voidrV  r   r~   rS  r   CUDA_WARN_ON_IMPLICIT_COPYr   r   r   r   r   )r   rJ   rZ   r.  devobjri   s         r   r   r   T  s>    $$ Ez	0	1	1 z'',,e33c27## 	9%c&999FF (  C c"""$S888F 
	60 7%7#C777 $C447
;C055666!!#f!555t|r   c                    |                                  |                                 }}| j        |j        k    rt          d| j        d|j                  |j        |j        k    rt	          d| j        d|j                  | j        r/|j        |j        k    r!t	          d| j        d|j                  d S d S )Nzincompatible dtype: z vs. zincompatible shape: zincompatible strides: )r   r&   	TypeErrorr$   r.   r'   r%   )ary1ary2ary1sqary2sqs       r   r   r   {  s    \\^^T\\^^FFzTZiTZZ1 2 2 	2|v|##j***djj2 3 3 	3 y 7V^v~55j,,,6 7 7 	77 755r   r   )r   TF)6r   r   r;   r=   rZ   ctypesr   numpyr*   r  r   numba.cuda.cudadrvr   r   r?   
numba.corer   r   numba.np.unsafe.ndarrayr	   
numba.miscr
   numba.npr   numba.cuda.api_utilr   numba.core.errorsr   warningsr   r   r   r   r   r,   r(   DeviceArrayr0   r   r  r   objectr   r   rF  rO  rS  rV  r   rU   r_  r~   r   r   r   r   r   <module>rx     sn                            & & & & & & 0 0 0 0 0 0 $ $ $ $ $ $ $ $ 2 2 2 2 2 2 ! ! ! ! ! ! " " " " " " ; ; ; ; ; ; 5 5 5 5 5 5      /	;//55II       3 3 3
   ; ; ;N/ N/ N/ N/ N/0 N/ N/ N/b
d! d! d! d! d!$ d! d! d!N ( ( (Vv! v! v! v! v!% v! v! v!r) ) ) ) )V ) ) )X    %rz       &
   , , , ,E E E E
" " "$   3 3 3 3$ $ $ $N7 7 7 7 7s    A6 6B B