
    
f6                        d dl mZ d dlmZ d dlmZmZmZmZm	Z	m
Z
 d dlmZmZmZmZmZmZ d dlmZ d dlmZmZmZ d dlmZ d dlmZmZmZ d d	lmZ d d
l m!Z! d dl"m#Z# d Z$ G d de          Z% G d de          Z&d Z' edd           G d de                      Z( edd           G d de                      Z) G d de          Z*e	 	 	 d%d            Z+d Z,e	 	 d&d            Z-	 	 	 d'd Z.d! Z/d" Z0 G d# d$e1          Z2dS )(    )ir)ConcreteTemplate)typestypingfuncdescconfigcompilersigutils)sanitize_compile_result_entriesCompilerBaseDefaultPassBuilderFlagsOptionCompileResult)global_compiler_lock)LoweringPassPassManagerregister_pass)NumbaInvalidConfigWarning)IRLegalizationNativeLoweringAnnotateTypes)warn)get_current_device)CUDACABICallConvc                 <    | d S t          | t                    sJ | S N)
isinstancedict)xs    3lib/python3.11/site-packages/numba/cuda/compiler.py_nvvm_options_typer"      s)    yt !T"""""    c                   F    e Zd Z eedd          Z eedd          ZdS )	CUDAFlagsNzNVVM options)typedefaultdoczCompute Capability)__name__
__module____qualname__r   r"   nvvm_optionstuplecompute_capability r#   r!   r%   r%      sQ        6  L
     r#   r%   c                   $    e Zd Zed             ZdS )CUDACompileResultc                      t          |           S r   )idselfs    r!   entry_pointzCUDACompileResult.entry_point9   s    $xxr#   N)r)   r*   r+   propertyr6   r/   r#   r!   r1   r1   8   s-          X  r#   r1   c                  8    t          |           } t          di | S )Nr/   )r   r1   )entriess    r!   cuda_compile_resultr:   >   s#    -g66G''w'''r#   TF)mutates_CFGanalysis_onlyc                       e Zd ZdZd Zd ZdS )CUDABackendcuda_backendc                 .    t          j        |            d S r   r   __init__r4   s    r!   rB   zCUDABackend.__init__H       d#####r#   c           
          |d         }t          j        |j        g|j        R  }t	          |j        |j        |j        j        |j	        |j
        |j        ||j                  |_        dS )zH
        Back-end: Packages lowering output in a compile result
        cr)typing_contexttarget_contexttyping_errortype_annotationlibrarycall_helper	signaturefndescT)r   rL   return_typeargsr:   	typingctx	targetctxstatusfail_reasonrI   rJ   rK   rM   rE   )r5   stateloweredrL   s       r!   run_passzCUDABackend.run_passK   sr     +$U%6DDDD	& ? ?1!1M+>	
 	
 	
 tr#   N)r)   r*   r+   _namerB   rV   r/   r#   r!   r>   r>   C   s9         E$ $ $    r#   r>   c                   "    e Zd ZdZdZd Zd ZdS )CreateLibraryz
    Create a CUDACodeLibrary for the NativeLowering pass to populate. The
    NativeLowering pass will create a code library if none exists, but we need
    to set it up with nvvm_options from the flags if they are present.
    create_libraryc                 .    t          j        |            d S r   rA   r4   s    r!   rB   zCreateLibrary.__init__i   rC   r#   c                     |j                                         }|j        j        }|j        j        }|                    ||          |_        |j                                         dS )N)r,   T)	rQ   codegenfunc_idfunc_qualnameflagsr,   rZ   rJ   enable_object_caching)r5   rT   r]   namer,   s        r!   rV   zCreateLibrary.run_passl   s\    /))++}*{/..t,.OO++---tr#   N)r)   r*   r+   __doc__rW   rB   rV   r/   r#   r!   rY   rY   _   sC          E$ $ $    r#   rY   c                       e Zd Zd Zd ZdS )CUDACompilerc                    t           }t          d          }|                    | j                  }|j                            |j                   |                    | j                  }|j                            |j                   |                     | j                  }|j                            |j                   |                                 |gS )Ncuda)	r   r   define_untyped_pipelinerT   passesextenddefine_typed_pipelinedefine_cuda_lowering_pipelinefinalize)r5   dpbpmuntyped_passestyped_passeslowering_passess         r!   define_pipelineszCUDACompiler.define_pipelinesx   s       44TZ@@
	.///00<<
	,---<<TZHH
	/000
tr#   c                 Z   t          d          }|                    t          d           |                    t          d           |                    t          d           |                    t
          d           |                    t          d           |                                 |S )Ncuda_loweringz$ensure IR is legal prior to loweringzannotate typeszcreate libraryznative loweringzcuda backend)r   add_passr   r   rY   r   r>   rm   )r5   rT   ro   s      r!   rl   z*CUDACompiler.define_cuda_lowering_pipeline   s    ))
N:	< 	< 	<
M#3444 	M#3444
N$5666
K000
	r#   N)r)   r*   r+   rs   rl   r/   r#   r!   re   re   w   s2               r#   re   Nc	                    |t          d          ddlm}	 |	j        }
|	j        }t                      }d|_        d|_        d|_        |s|rd|_	        |rd|_
        |rd|_        nd|_        |rd|_        |rd|_        |r||_        ||_        ddlm}  |d	          5  t%          j        |
|| |||i t(          
          }d d d            n# 1 swxY w Y   |j        }|                                 |S )Nz#Compute Capability must be supplied   cuda_targetTpythonnumpyr   )target_overriderg   )rP   rQ   funcrO   rN   r`   localspipeline_class)
ValueError
descriptorrz   rF   rG   r%   
no_compileno_cpython_wrapperno_cfunc_wrapper	debuginfodbg_directives_onlyerror_modelforceinlinefastmathr,   r.   numba.core.target_extensionr}   r	   compile_extrare   rJ   rm   )pyfuncrN   rO   debuglineinfoinliner   r,   ccrz   rP   rQ   r`   r}   cresrJ   s                   r!   compile_cudar      s    
z>???''''''*I*IKKEE#E!E    )$(! $$# !   *)!E <;;;;;		 	  C C%	09+1+/2=,1-/5AC C CC C C C C C C C C C C C C C C lGKs   !"CCCc                    |j                             |j         d||          }|                    |           |j        }|j        }t          |           }|                    ||          }	| j                            |j        |          }
| 	                    d          }t          j        ||
|j                  }t          j        ||	|          }t          j        |                    d                    }|                     |          }|                    ||j                  }| j                            |||||          \  }}|                    |           |                    |           |                                 |S )z
    Wrap a Numba ABI function in a C ABI wrapper at the NVVM IR level.

    The C ABI wrapper will have the same name as the source Python function.
    
_function_)
entry_namer,   zcuda.cabi.wrapper )r]   rZ   rb   add_linking_libraryargtypesrestyper   get_function_type	call_convcreate_moduler   Functionllvm_func_name	IRBuilderappend_basic_blockget_arg_packerfrom_argumentsrO   call_functionretadd_ir_modulerm   )contextlibrM   wrapper_function_namer,   rJ   r   r   c_call_convwrapfntyfntywrapper_moduler~   wrapfnbuilderarginfocallargs_return_values                      r!   cabi_wrap_functionr      s    k((CH)@)@)@4I6B ) D DG $$$ HnG"7++K,,Wh??H..v~xHHD **+>??N;~tV-BCCD
 [3HIIFl644R8899G$$X..G%%gv{;;H '55w(4 4OA|KK.)))Nr#   numbac
           
         |dvrt          d|           |dk    r|st          d          |r |rd}
t          t          |
                     |	pt                      }	||rdndd}t	          j        |          \  }}|pt          j        }t          | |||||||	          }|j	        j
        }|r!|s|t          j        k    rt          d
          |j        }|rA|j        }|dk    r3|	                    d| j                  }t%          |||j        ||          }n=| j        }|j        }|j        }|                    |j        |j        |||||          \  }}|                    |          }||fS )a  Compile a Python function to PTX for a given set of argument types.

    :param pyfunc: The Python function to compile.
    :param sig: The signature representing the function's input and output
                types.
    :param debug: Whether to include debug info in the generated PTX.
    :type debug: bool
    :param lineinfo: Whether to include a line mapping from the generated PTX
                     to the source code. Usually this is used with optimized
                     code (since debug mode would automatically include this),
                     so we want debug info in the LLVM but only the line
                     mapping in the final PTX.
    :type lineinfo: bool
    :param device: Whether to compile a device function. Defaults to ``False``,
                   to compile global kernel functions.
    :type device: bool
    :param fastmath: Whether to enable fast math flags (ftz=1, prec_sqrt=0,
                     prec_div=, and fma=1)
    :type fastmath: bool
    :param cc: Compute capability to compile for, as a tuple
               ``(MAJOR, MINOR)``. Defaults to ``(5, 0)``.
    :type cc: tuple
    :param opt: Enable optimizations. Defaults to ``True``.
    :type opt: bool
    :param abi: The ABI for a compiled function - either ``"numba"`` or
                ``"c"``. Note that the Numba ABI is not considered stable.
                The C ABI is only supported for device functions at present.
    :type abi: str
    :param abi_info: A dict of ABI-specific options. The ``"c"`` ABI supports
                     one option, ``"abi_name"``, for providing the wrapper
                     function's name. The ``"numba"`` ABI has no options.
    :type abi_info: dict
    :return: (ptx, resty): The PTX code and inferred return type
    :rtype: tuple
    )r   czUnsupported ABI: r   z&The C ABI is not supported for kernelsz{debug=True with opt=True (the default) is not supported by CUDA. This may result in a crash - set debug=False or opt=False.   r   )r   opt)r   r   r   r,   r   z'CUDA kernel must have void return type.abi_name)r   )NotImplementedErrorr   r   r   r
   normalize_signaturer   CUDA_DEFAULT_PTX_CCr   rL   rN   r   void	TypeErrorrG   rJ   getr)   r   rM   __code__co_filenameco_firstlinenoprepare_cuda_kernelget_asm_str)r   sigr   r   devicer   r   r   abiabi_infomsgr,   rO   rN   r   restytgtr   wrapper_namecodefilenamelinenumkernelptxs                           r!   compile_ptxr      s   L .  !";c";";<<<
czz&z!"JKKK - -2 	&s++,,,!466H qqQ L
 !4S99D+		)v)BT!)H%1b: : :D N&E CV C 3 3ABBB

C 7l#::#<<
FODDL$S#t{L%13 3C #%--dlDK.6h.57 7V //R/
 
 C:r#   c	                 Z    t                      j        }	t          | ||||||	|||
  
        S )zCompile a Python function to PTX for a given set of argument types for
    the current device's compute capabilility. This calls :func:`compile_ptx`
    with an appropriate ``cc`` value for the current device.)r   r   r   r   r   r   r   r   )r   r.   r   )
r   r   r   r   r   r   r   r   r   r   s
             r!   compile_ptx_for_current_devicer   X  s?     
			0Bvs%($xBC3 3 3 3r#   c                 .    t          | ||          j        S r   ) declare_device_function_templatekeyrb   r   r   s      r!   declare_device_functionr   d  s    +D'8DDHHr#   c                 (  	 ddl m} |j        }|j        }t	          j        |g|R  	t          | 	           G 	fddt                    }t          j	        | ||          }|
                    |           |
                    |           |S )Nrx   ry   c                       e Zd Z ZgZdS )Bdeclare_device_function_template.<locals>.device_function_templateN)r)   r*   r+   r   cases)extfnr   s   r!   device_function_templater   o  s        r#   r   r   )r   rz   rF   rG   r   rL   ExternFunctionr   r   ExternalFunctionDescriptorinsert_user_function)
rb   r   r   rz   rP   rQ   r   rM   r   r   s
           @@r!   r   r   h  s    ''''''*I*I

7
.X
.
.
.C4%%E       #3    07X7 7 7F""5*BCCC""5&111##r#   c                       e Zd Zd ZdS )r   c                 "    || _         || _        d S r   )rb   r   )r5   rb   r   s      r!   rB   zExternFunction.__init__|  s    	r#   N)r)   r*   r+   rB   r/   r#   r!   r   r   {  s#            r#   r   )FFFFNN)FFFFNTr   N)FFFFTr   N)3llvmliter   numba.core.typing.templatesr   
numba.corer   r   r   r   r	   r
   numba.core.compilerr   r   r   r   r   r   numba.core.compiler_lockr   numba.core.compiler_machineryr   r   r   numba.core.errorsr   numba.core.typed_passesr   r   r   warningsr   numba.cuda.apir   numba.cuda.targetr   r"   r%   r1   r:   r>   rY   re   r   r   r   r   r   r   objectr   r/   r#   r!   <module>r      sy         8 8 8 8 8 8 J J J J J J J J J J J J J J J J0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 : 9 9 9 9 9G G G G G G G G G G 7 7 7 7 7 74 4 4 4 4 4 4 4 4 4       - - - - - - . . . . . .  
 
 
 
 
 
 
 
:       ( ( (
 4u555    ,   656 5666    L   76.    <   B BG<@7 7 7 7t) ) )X AFIMU U U Up GLEI9=	3 	3 	3 	3I I I$ $ $&    V     r#   