
    HR-eF                     b   d Z ddlZddlmZ ddlmZ ddlmZ ddlm	Z	  G d d	e
          Z G d
 d          Zd Z G d de	j                  Z G d de	j                  Z G d de	j                  Z G d de	j                  Z G d de	j                  Z G d de	j                  ZdS )zAn extensible HTML table reader and writer.

html.py:
  Classes to read and write HTML tables

`BeautifulSoup <http://www.crummy.com/software/BeautifulSoup/>`_
must be installed to read HTML tables.
    N)deepcopy)Column)writer   )corec                       e Zd ZdZd Zd ZdS )
SoupStringz8
    Allows for strings to hold BeautifulSoup data.
    c                 ,    t          j        | g|R i |S N)str__new__)clsargskwargss      5lib/python3.11/site-packages/astropy/io/ascii/html.pyr   zSoupString.__new__   s"    {30000000    c                     || _         d S r   )soup)selfvals     r   __init__zSoupString.__init__   s    			r   N)__name__
__module____qualname____doc__r   r    r   r   r	   r	      s<         1 1 1    r   r	   c                       e Zd ZdZd Zd ZdS )
ListWriterzD
    Allows for XMLWriter to write to a list instead of a file.
    c                     || _         d S r   )out)r   r    s     r   r   zListWriter.__init__%   s    r   c                 :    | j                             |           d S r   )r    append)r   datas     r   writezListWriter.write(   s    r   N)r   r   r   r   r   r$   r   r   r   r   r       s<               r   r   c                     | | j         dk    rdS d|vr|dk    S |d         }t          |t                    rd| j        v o| d         |k    S t          |t                    r||k    S dS )zb
    Checks whether the given BeautifulSoup tag is the table
    the user intends to process.
    NtableFtable_idr   id)name
isinstancer   attrsint)r   htmldictnumtabler'   s       r   identify_tabler/   ,   s    
 |tyG++u	8	#	#1}
#H(C   $tz!<d4jH&<<	Hc	"	" $8## 5r   c                       e Zd ZdZd ZdS )HTMLInputterz
    Input lines of HTML in a valid form.

    This requires `BeautifulSoup
    <http://www.crummy.com/software/BeautifulSoup/>`_ to be installed.
    c                    	 ddl m} n"# t          $ r t          j        d          w xY wd| j        vr_t          j                    5  t          j        dd            |d	                    |                    }ddd           n# 1 swxY w Y   n* |d	                    |          | j        d                   }|
                    d	          }t          |          D ]"\  }}t          || j        |d
z             r|} n[#t          | j        d         t                    rd| j        d          }nd| j        d          d}t          j        d| d          d |
                    d          D             }	|	S )zh
        Convert the given input into a list of SoupString rows
        for further processing.
        r   )BeautifulSoupz3BeautifulSoup must be installed to read HTML tablesparserignorez&.*no parser was explicitly specified.*
Nr&   r   r'   znumber zid ''zERROR: HTML table z
 not foundc                 ,    g | ]}t          |          S r   )r	   .0xs     r   
<listcomp>z.HTMLInputter.process_lines.<locals>.<listcomp>m   s    AAAqZ]]AAAr   tr)bs4r3   ImportErrorr   OptionalTableImportErrorhtmlwarningscatch_warningsfilterwarningsjoinfind_all	enumerater/   r*   r,   InconsistentTableError)
r   linesr3   r   tablesipossible_tabler&   	err_descr	soup_lists
             r   process_lineszHTMLInputter.process_linesI   s
   
	))))))) 	 	 	/E  	
 49$$(** 7 7'F   %}TYYu%5%5667 7 7 7 7 7 7 7 7 7 7 7 7 7 7 !=5!1!149X3FGGDw''!*6!2!2 	 	A~ndiQ?? & $)J/55 <=di
&;==		;49Z#8;;;	-:Y:::  
 BAENN4,@,@AAA	s   	 (4BBBN)r   r   r   r   rO   r   r   r   r1   r1   A   s-         & & & & &r   r1   c                       e Zd ZdZd ZdS )HTMLSplitterz 
    Split HTML table data.
    c              #   L  K   |D ]w}t          |t                    st          d          |j        }|                    d          }|rd |D             V  |                    d          }|rd |D             V  xt          |          dk    rt          j        d          dS )	z=
        Return HTML data from lines as a generator.
        'HTML lines should be of type SoupStringthc                     g | ]Q}|                     d           r!|j                                        |d          fn|j                                        RS colspan)has_attrtextstripr:   els     r   r<   z)HTMLSplitter.__call__.<locals>.<listcomp>   s`         {{9--)RW]]__bm44  r   tdc                 @    g | ]}|j                                         S r   )rY   rZ   r[   s     r   r<   z)HTMLSplitter.__call__.<locals>.<listcomp>   s"    ???2rw}}???r   r   z.HTML tables must contain data in a <table> tagN)r*   r	   	TypeErrorr   rF   lenr   rH   )r   rI   liner   header_elementsdata_elementss         r   __call__zHTMLSplitter.__call__w   s        	@ 	@DdJ// K IJJJ9D"mmD11O   .	      !MM$//M @????????u::??-@   ?r   N)r   r   r   r   rd   r   r   r   rQ   rQ   r   s-             r   rQ   c                        e Zd ZdZ ej        e           ej        e           ej        e          gZ	 fdZ
 xZS )HTMLOutputterz
    Output the HTML data as an ``astropy.table.Table`` object.

    This subclass allows for the final table to contain
    multidimensional columns (defined using the colspan attribute
    of <th>).
    c                    g }d}|t          |          k     r||         }t          |d          rp||||j        z            }t          j        |j                  }t          t          d |D                        |_        |	                    |           ||j        z  }n|	                    |           |dz  }|t          |          k     t                                          ||          S )z?
        Process the data in multidimensional columns.
        r   rW   c                     g | ]	}|j         
S r   )str_valsr9   s     r   r<   z*HTMLOutputter.__call__.<locals>.<listcomp>   s    -L-L-LQaj-L-L-Lr   r   )r`   hasattrrW   r   r   r)   listzipri   r"   superrd   )	r   colsmetanew_colscol_numcol	span_colsnew_col	__class__s	           r   rd   zHTMLOutputter.__call__   s     D		!!w-CsI&& 	 7S[+@!@A	+ch//#'-L-L)-L-L-L(M#N#N (((3;&$$$1 D		!! ww$///r   )r   r   r   r   r   convert_numpyr,   floatr   default_convertersrd   __classcell__ru   s   @r   rf   rf      s|          	35!!30 0 0 0 0 0 0 0 0r   rf   c                       e Zd ZeZd Zd ZdS )
HTMLHeaderc                     t          |          D ];\  }}t          |t                    st          d          |j        }|j        |c S <dS )zE
        Return the line number at which header data begins.
        rS   N)rG   r*   r	   r_   r   rT   r   rI   rK   ra   r   s        r   
start_linezHTMLHeader.start_line   se     !'' 	 	GAtdJ// K IJJJ9Dw" # tr   c                    g | _         g }| j        D ],}t          |t                    rt	          j        |d                   }t          |d                   |_        | j                             |           |                    |d                    t          dt          |d                             D ]C}| j                             t	          j        d                     |                    d           D| j                             t	          j        |                     |                    |           .|| _        dS )zU
        Set columns from header names, handling multicolumns appropriately.
        r   )r)   r    N)
rn   namesr*   tupler   r   r,   rW   r"   range)r   	new_namesr)   rr   rK   s        r   _set_cols_from_nameszHTMLHeader._set_cols_from_names   s,    		J 	' 	'D$&& 'ktAw///!$q'll	  %%%  a)))q#d1g,,// ) )AI$$T[__555$$R(((()
 	  $!7!7!7888  &&&&


r   N)r   r   r   rQ   splitter_classr   r   r   r   r   r|   r|      s7        !N      r   r|   c                       e Zd ZeZd Zd ZdS )HTMLDatac                     t          |          D ]V\  }}t          |t                    st          d          |j        }|j        |j        t          j        d          |c S Wt          j        d          )zD
        Return the line number at which table data begins.
        rS   Nz9HTML tables cannot have headings and data in the same rowz!No start line found for HTML data)	rG   r*   r	   r_   r   r]   rT   r   rH   r~   s        r   r   zHTMLData.start_line   s     !'' 
	 
	GAtdJ// K IJJJ9Dw"7&5S    # )*MNNNr   c                     d}t          |          D ]9\  }}t          |t                    st          d          |j        }|j        |}:|dk    rdS |dz   S )zB
        Return the line number at which table data ends.
        rS   Nr   )rG   r*   r	   r_   r   r]   )r   rI   
last_indexrK   ra   r   s         r   end_linezHTMLData.end_line   su     
 '' 	 	GAtdJ// K IJJJ9Dw"
4A~r   N)r   r   r   rQ   r   r   r   r   r   r   r   r      s:        !NO O O$    r   r   c                   ^     e Zd ZdZdZdgZdZdZeZ	e
ZeZdZi f fd	Z fdZd Zd	 Z xZS )
HTMLaq
  HTML format table.

    In order to customize input and output, a dict of parameters may
    be passed to this class holding specific customizations.

    **htmldict** : Dictionary of parameters for HTML input/output.

        * css : Customized styling
            If present, this parameter will be included in a <style>
            tag and will define stylistic attributes of the output.

        * table_id : ID for the input table
            If a string, this defines the HTML id of the table to be processed.
            If an integer, this specifies the index of the input table in the
            available tables. Unless this parameter is given, the reader will
            use the first table found in the input file.

        * multicol : Use multi-dimensional columns for output
            The writer will output tuples as elements of multi-dimensional
            columns if this parameter is true, and if not then it will
            use the syntax 1.36583e-13 .. 1.36583e-13 for output. If not
            present, this parameter will be true by default.

        * raw_html_cols : column name or list of names with raw HTML content
            This allows one to include raw HTML content in the column output,
            for instance to include link references in a table.  This option
            requires that the bleach package be installed.  Only whitelisted
            tags are allowed through for security reasons (see the
            raw_html_clean_kwargs arg).

        * raw_html_clean_kwargs : dict of keyword args controlling HTML cleaning
            Raw HTML will be cleaned to prevent unsafe HTML from ending up in
            the table output.  This is done by calling ``bleach.clean(data,
            **raw_html_clean_kwargs)``.  For details on the available options
            (e.g. tag whitelist) see:
            https://bleach.readthedocs.io/en/latest/clean.html

        * parser : Specific HTML parsing library to use
            If specified, this specifies which HTML parsing library
            BeautifulSoup should use as a backend. The options to choose
            from are 'html.parser' (the standard library parser), 'lxml'
            (the recommended parser), 'xml' (lxml's XML parser), and
            'html5lib'. html5lib is a highly lenient parser and therefore
            might work correctly for unusual input if a different parser
            fails.

        * jsfiles : list of js files to include when writing table.

        * cssfiles : list of css files to include when writing table.

        * js : js script to include in the body when writing table.

        * table_class : css class for the table

    rA   z.htmlz
HTML table   c                     t                                                       t          |          | _        d|vr
d| j        d<   d|vr
d| j        d<   | j        | j        _        dS )zB
        Initialize classes for HTML reading and writing.
        multicolTr'   r   N)rm   r   r   rA   inputter)r   r-   ru   s     r   r   zHTML.__init__J  sj     	X&&	X%%$(DIj!X%%$%DIj!!Yr   c                 l    t                      | _        t                                          |          S )zU
        Read the ``table`` in HTML format and return a resulting ``Table``.
        )rf   	outputterrm   read)r   r&   ru   s     r   r   z	HTML.readV  s&     'ww||E"""r   c                    |                      |           t          |j                                                  }|| j        j        _        || j        _        t          | j        j        t                    r| j        j        g| j        _        | j        
                    |           | j                                         g }| j                            dg           t          t                    rgfd|D             }| j                            di           }t          j        t#          |                    }|                    d          5  |                    d          5  |                    dddi	          5  	 d
d
d
           n# 1 swxY w Y   |                    dddd	          5  	 d
d
d
           n# 1 swxY w Y   d| j        v rM|                    d          5  |                    | j        d                    d
d
d
           n# 1 swxY w Y   d| j        v rB| j        d         D ]4}|                    dd|d          5  	 d
d
d
           n# 1 swxY w Y   5d| j        v rT| j        d         D ]F}|                    d|          5  |                    d           d
d
d
           n# 1 swxY w Y   Gd
d
d
           n# 1 swxY w Y   |                    d          5  d| j        v rz|                    d          5  |                    d          5  |                    | j        d                    d
d
d
           n# 1 swxY w Y   d
d
d
           n# 1 swxY w Y   t          | j        d         t                    r| j        d         }nd
}d| j        v r| j        d         }	d|	i}
ni }
|                    d||
           5  |                    d!          5  |                    d"          5  |D ]}t)          |j                  d#k    r0| j        d$         r#|                    d%|j        d#         &           n|                    d%           |                    |j        j                                                   |                    d'(           	 d
d
d
           n# 1 swxY w Y   g }g }g }t7          ||          D ])\  }}t)          |j                  d#k    r| j        d$         r|j        d#         }t9          |          D ]t;          fd)|D                       }|                     ||j                                                  }|                    |           |                    |           |                    |           |                     ||j                                                  }|                    |           |                    |           +	 d
d
d
           n# 1 swxY w Y   t7          | D ]}|                    d"          5  t7          ||          D ]\  }}|rd*nd+} |j        |fi |5  |                    d,           |                    |                                           |                    d'(           d
d
d
           n# 1 swxY w Y   	 d
d
d
           n# 1 swxY w Y   	 d
d
d
           n# 1 swxY w Y   d
d
d
           n# 1 swxY w Y   d
d
d
           n# 1 swxY w Y   d                     |          gS )-zR
        Return data in ``table`` converted to HTML as a list of strings.
        raw_html_colsc                 ,    g | ]}|j         j        vS r   )infor)   )r:   rr   r   s     r   r<   zHTML.write.<locals>.<listcomp>u  s"    KKKs]:KKKr   raw_html_clean_kwargsrA   headro   charsetzutf-8)attribNzContent-typeztext/html;charset=UTF-8)z
http-equivcontentcssstylecssfileslink
stylesheetztext/css)relhreftypejsfilesscript)srcr   bodyjsnoner'   table_classclassr&   )r(   r   theadr=   r   r   rT   rV   F)indentc                      g | ]
}|         S r   r   )r:   r\   rK   s     r   r<   zHTML.write.<locals>.<listcomp>  s    5J5J5Jbe5J5J5Jr   
escape_xmlbleach_cleanr]   )!_check_multidim_tablerk   columnsvaluesr#   headerrn   r*   fill_valuesr   _set_fill_values_set_col_formatsrA   getr   r   	XMLWriterr   tagxml_cleaning_methodr`   shapestartr   r)   rZ   endrl   r   r   iter_str_valsr"   rE   )r   r&   rn   rI   cols_escapedr   wfilenamehtml_table_idhtml_table_classr   rr   col_str_itersnew_cols_escapedrp   col_escapedspanrt   new_col_iter_str_valscol_iter_str_valsrowr\   methodrK   r   s                          @@r   r$   z
HTML.write]  sM   
 	""5)))EM((**++ $		di+U33 	<%)Y%:$;DI!	""4(((	""$$$ 	or::mS)) 	,*OMKKKKdKKK !%	.Er J J Z..//UU6]] ]	8 ]	8v ' 'UU69g*>U??                UU&4#<                      DI%%w 1 1ty/0001 1 1 1 1 1 1 1 1 1 1 1 1 1 1**$(Ij$9 ! !UU"8* #   ! ! !! ! ! ! ! ! ! ! ! ! ! ! ! ! ! 	))$(Ii$8 ' 'UU8U:: ' 'FF2JJJ' ' ' ' ' ' ' ' ' ' ' ' ' ' '/' ' ' ' ' ' ' ' ' ' ' ' ' ' '4 v B8 B849$$..v66 4 4UU8__ 4 4FF49T?3334 4 4 4 4 4 4 4 4 4 4 4 4 4 44 4 4 4 4 4 4 4 4 4 4 4 4 4 4 di
3S99 )$(Ij$9MM$(M DI--'+y'?$%'78FFFUU7}VUDD 48 48w 'E 'EUU4[[ 4 4'+ 4 4#&sy>>A#5#5$)J:O#5$%GGD#)A,G$G$G$G$G$%GGDMMM !sx}':':'<'< = = = !U 3 3 3 344 4 4 4 4 4 4 4 4 4 4 4 4 4 4 )++-( $&03D,0G0G E E,C"39~~11di
6K1'*y|).t 	!= 	!=A.45J5J5J5Jc5J5J5J.K.KG<@<L<L(+W\-G-G-I-I=& =&$9 %2$8$89N$O$O$O$4$;$;K$H$H$H$,OOG$<$<$<$<	!= 594D4D$')?)?)A)A5" 5" 1 !. 4 45F G G G 0 7 7 D D D D'E)'E 'E 'E 'E 'E 'E 'E 'E 'E 'E 'E 'E 'E 'E 'ER  #M2 
8 
8UU4[[ 	8 	836s<L3M3M 8 8K9D)X.%:Q%:$*&" &".C&" &" !8 !8 %&GGDMMM$%FF288::$6$6$6$%EEE$7$7$7!8 !8 !8 !8 !8 !8 !8 !8 !8 !8 !8 !8 !8 !8 !88	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8
8U48 48 48 48 48 48 48 48 48 48 48 48 48 48 48B8 B8 B8 B8 B8 B8 B8 B8 B8 B8 B8 B8 B8 B8 B87]	8 ]	8 ]	8 ]	8 ]	8 ]	8 ]	8 ]	8 ]	8 ]	8 ]	8 ]	8 ]	8 ]	8 ]	8@ s  \7.KF
KFKFK;G	=K	GKG!K2!HKH##K&H#'4KI)	K)I--K0I-13K$K	:KK

KK
K\7K"	"\7%K"	&\7>\ M73!M 	M7 M$$M7'M$(M7+\ 7M;;\ >M;?A(\ '\	=X#	B*S
>X#	
SX#	SEX#	\	#X''\	*X'+#\	*[08A[[0[[0[[0$\	0[44\	7[48\	=\ 	\\ \\ \7 \$	$\7'\$	(\77\;>\;c              #     K   t          |d          }t          |d          }t          |          D ]O\  }}|r)|r'|j        |         r|j        t          j                 V  0|r||j        v r|j        |         V  K|V  PdS )zZ
        Return an iterator of the values with replacements based on fill_values.
        maskr   N)rj   rG   r   r   r   masked)r   rr   r   is_masked_columnhas_fill_valuesidxcol_strs          r   r   zHTML.fill_values  s      
 #3//!#}55%m44 	 	LC O 8C= /$+6666 co--/'2222MMMM	 	r   )r   r   r   r   _format_name_io_registry_format_aliases_io_registry_suffix_descriptionr|   header_classr   
data_classr1   inputter_classmax_ndimr   r   r$   r   ry   rz   s   @r   r   r     s        6 6p L#)(!LLJ!NH " 
' 
' 
' 
' 
' 
'# # # # #@  @  @ D      r   r   )r   rB   copyr   astropy.tabler   astropy.utils.xmlr   r   r   r   r	   r   r/   BaseInputterr1   BaseSplitterrQ   TableOutputterrf   
BaseHeaderr|   BaseDatar   
BaseReaderr   r   r   r   <module>r      s                       $ $ $ $ $ $      	 	 	 	 	 	 	 		 	 	 	 	 	 	 	  *. . . . .4$ . . .b    4$   >#0 #0 #0 #0 #0D' #0 #0 #0L% % % % % % % %P$ $ $ $ $t} $ $ $Nl l l l l4? l l l l lr   