
    Qd'                         d Z ddlZddlZddlZddlmZ dZd Zd Zd Z	d Z
d	 Z	 ddZ ed          dd            ZddZ	 	 	 ddZd Zd ZdS )zu
Utility functions for the `twitterclient` module which do not require
the `twython` library to have been installed.
    N)
deprecated.c                 ~    g }|D ]7}	 t          | ||           # t          $ r}t          d|          |d}~ww xY w|S )z
    Extract field values from a full tweet and return them as a list

    :param json tweet: The tweet in JSON format
    :param list fields: The fields to be extracted from the tweet
    :rtype: list(str)
    z6Fatal error when extracting fields. Cannot find field N)_add_field_to_out	TypeErrorRuntimeError)tweetfieldsoutfieldes        3lib/python3.11/site-packages/nltk/twitter/common.pyextract_fieldsr      sr     C  	eUC0000 	 	 	H% 	 Js   
:5:c                     t          |          r+t          |          \  }}t          | |         ||           d S || |         gz  }d S N)_is_composed_key_get_key_value_composedr   )jsonr   r   keyvalues        r   r   r   )   sS     ,U33
U$s)UC00000U}    c                     t           | v S r   )HIER_SEPARATOR)r   s    r   r   r   1   s    U""r   c                     |                      t                    }|d         }t                              |dd                    }||fS )Nr      )splitr   join)r   r   r   r   s       r   r   r   5   sA    
++n
%
%C
a&CABB((E:r   c                 "   | sd S t          | t                    rC | j                    D ]1\  }}||k    r|c S |dk    s|dk    rt          ||          }||c S 2d S t          | t                    r| D ]}t          ||          }||c S d S d S )Nentitiesextended_entities)
isinstancedictitems_get_entity_recursivelist)r   entityr   r   	candidateitems         r   r$   r$   =   s     t	D$		 $$*,, 
	% 
	%JCf}}
 j  C+>$>$>1%@@	($$$$t	D$		  	! 	!D-dF;;I$     %ttr   utf8replaceFc                     t          ||||          \  }}|                    |           | D ];}t          j        |          }	t	          |	|          }
|                    |
           <|                                 dS )a;  
    Extract selected fields from a file of line-separated JSON tweets and
    write to a file in CSV format.

    This utility function allows a file of full tweets to be easily converted
    to a CSV file for easier processing. For example, just TweetIDs or
    just the text content of the Tweets can be extracted.

    Additionally, the function allows combinations of fields of other Twitter
    objects (mainly the users, see below).

    For Twitter entities (e.g. hashtags of a Tweet), and for geolocation, see
    `json2csv_entities`

    :param str infile: The name of the file containing full tweets

    :param str outfile: The name of the text file where results should be    written

    :param list fields: The list of fields to be extracted. Useful examples    are 'id_str' for the tweetID and 'text' for the text of the tweet. See    <https://dev.twitter.com/overview/api/tweets> for a full list of fields.    e. g.: ['id_str'], ['id', 'text', 'favorite_count', 'retweet_count']    Additionally, it allows IDs from other Twitter objects, e. g.,    ['id', 'text', 'user.id', 'user.followers_count', 'user.friends_count']

    :param error: Behaviour for encoding errors, see    https://docs.python.org/3/library/codecs.html#codec-base-classes

    :param gzip_compress: if `True`, output files are compressed with gzip
    N)_outf_writerwriterowr   loadsr   close)fpoutfiler
   encodingerrorsgzip_compresswriteroutfliner	   rows              r   json2csvr9   W   s    D "'8V]KKNVT
OOF  
4  UF++JJLLLLLr   z-Use open() and csv.writer() directly instead.c                 &    t          | |||          S )z+Get a CSV writer with optional compression.)r,   )r1   r2   r3   r4   s       r   outf_writer_compatr;      s     6=AAAr   c                     |rt          j        | dd||          }nt          | dd||          }t          j        |          }||fS )Nwt )newliner2   r3   w)gzipopencsvr5   )r1   r2   r3   r4   r6   r5   s         r   r,   r,      sX     Py$XfUUUGS"xOOOZFD>r   c                    t          ||||          \  }}	t          |||          }
|                    |
           | D ]}t          j        |          }t          |          rXt          |          \  }}t          ||          }|sJt          ||          }t          ||          }t          ||||           }t          ||          }t          ||          }t          ||||           |	
                                 dS )a  
    Extract selected fields from a file of line-separated JSON tweets and
    write to a file in CSV format.

    This utility function allows a file of full Tweets to be easily converted
    to a CSV file for easier processing of Twitter entities. For example, the
    hashtags or media elements of a tweet can be extracted.

    It returns one line per entity of a Tweet, e.g. if a tweet has two hashtags
    there will be two lines in the output file, one per hashtag

    :param tweets_file: the file-like object containing full Tweets

    :param str outfile: The path of the text file where results should be        written

    :param list main_fields: The list of fields to be extracted from the main        object, usually the tweet. Useful examples: 'id_str' for the tweetID. See        <https://dev.twitter.com/overview/api/tweets> for a full list of fields.
        e. g.: ['id_str'], ['id', 'text', 'favorite_count', 'retweet_count']
        If `entity_type` is expressed with hierarchy, then it is the list of        fields of the object that corresponds to the key of the entity_type,        (e.g., for entity_type='user.urls', the fields in the main_fields list        belong to the user object; for entity_type='place.bounding_box', the        files in the main_field list belong to the place object of the tweet).

    :param list entity_type: The name of the entity: 'hashtags', 'media',        'urls' and 'user_mentions' for the tweet object. For a user object,        this needs to be expressed with a hierarchy: `'user.urls'`. For the        bounding box of the Tweet location, use `'place.bounding_box'`.

    :param list entity_fields: The list of fields to be extracted from the        entity. E.g. `['text']` (of the Tweet)

    :param error: Behaviour for encoding errors, see        https://docs.python.org/3/library/codecs.html#codec-base-classes

    :param gzip_compress: if `True`, output files are compressed with gzip
    N)r,   get_header_field_listr-   r   r.   r   r   r$   r   _write_to_filer/   )tweets_filer1   main_fieldsentity_typeentity_fieldsr2   r3   r4   r5   r6   headerr7   r	   r   r   object_jsonobject_fieldsr#   tweet_fieldss                      r   json2csv_entitiesrO      s   d "'8V]KKNVT";]KKF
OOF G G
4  K(( 	G0==JC/s;;K *;DDM)+u==E=%GGGG)%==L)%==E<vFFFFJJLLLLLr   c                     t          |          rt          |          \  }}||nd |rfd| D             }n| }fd|D             }||z   S )Nc                 H    g | ]}t                               |g          S  r   r   ).0xmain_entitys     r   
<listcomp>z)get_header_field_list.<locals>.<listcomp>   s,    NNNQ>&&Q'788NNNr   c                 H    g | ]}t                               |g          S rR   rS   )rT   rU   
sub_entitys     r   rW   z)get_header_field_list.<locals>.<listcomp>   s+    KKK~""J?33KKKr   )r   r   )	rH   rI   rJ   r   r   output1output2rV   rY   s	          @@r   rE   rE      s    $$ !,[99
U

 
 NNNN+NNNKKKK]KKKGWr   c                    |sd S t          |t                    r| }d |D             }d |D             }|D ]+}||         }t          |t                    r||z  }%||gz  },|D ]_}	t          |	          \  }
}||
         }t          |t                    s"t	          d                    |
                    |||         gz  }`|                    |           d S |D ]*}| t          ||          z   }|                    |           +d S )Nc                 0    g | ]}t          |          |S rR   r   rT   rU   s     r   rW   z"_write_to_file.<locals>.<listcomp>   s'    SSSQ?OPQ?R?RSqSSSr   c                 0    g | ]}t          |          |S rR   r^   r_   s     r   rW   z"_write_to_file.<locals>.<listcomp>   s&     Q Q Qq=Ma=P=P Q Q Q Qr   zEKey {} does not contain a dictionary
                in the json file)r!   r"   r%   r   r   formatr-   r   )rM   r#   rJ   r5   r8   entity_field_valuesentity_field_composedr   r   dkdvd	json_dictr(   s                 r   rF   rF      sc     	% SS-SSS Q QM Q Q Q( 	 	E%LE%&& uw& 
	# 
	#A,Q//FBb	Ii.. "$$*F% %   IbM?"CC  nT=AAA r   )r)   r*   F)F)__doc__rC   rA   r   nltk.internalsr   r   r   r   r   r   r$   r9   r;   r,   rO   rE   rF   rR   r   r   <module>rj      s:    


   % % % % % %  &  # # #    6 KP* * * *Z ;<<B B B =<B
    D D D DN  "# # # # #r   