# Copyright 2022 Bill Wendling, All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Simple Python Parser

Parse Python code into a list of logical lines, represented by LogicalLine
objects. This uses Python's tokenizer to generate the tokens. As such, YAPF must
be run with the appropriate Python version---Python >=3.7 for Python 3.7 code,
Python >=3.8 for Python 3.8 code, etc.

This parser uses Python's native "tokenizer" module to generate a list of tokens
for the source code. It then uses Python's native "ast" module to assign
subtypes, calculate split penalties, etc.

A "logical line" produced by Python's "tokenizer" module ends with a
tokenize.NEWLINE, rather than a tokenize.NL, making it easy to separate them
out. Comments all end with a tokentizer.NL, so we need to make sure we don't
errantly pick up non-comment tokens when parsing comment blocks.

  ParseCode(): parse the code producing a list of logical lines.
"""

# TODO: Call from yapf_api.FormatCode.

import ast
import codecs
import os
import token
import tokenize
from io import StringIO
from tokenize import TokenInfo

from yapf.pyparser import split_penalty_visitor
from yapf.yapflib import format_token
from yapf.yapflib import logical_line

CONTINUATION = token.N_TOKENS


def ParseCode(unformatted_source, filename='<unknown>'):
  """Parse a string of Python code into logical lines.

  This provides an alternative entry point to YAPF.

  Arguments:
    unformatted_source: (unicode) The code to format.
    filename: (unicode) The name of the file being reformatted.

  Returns:
    A list of LogicalLines.

  Raises:
    An exception is raised if there's an error during AST parsing.
  """
  if not unformatted_source.endswith(os.linesep):
    unformatted_source += os.linesep

  try:
    ast_tree = ast.parse(unformatted_source, filename)
    ast.fix_missing_locations(ast_tree)
    readline = StringIO(unformatted_source).readline
    tokens = tokenize.generate_tokens(readline)
  except Exception:
    raise

  logical_lines = _CreateLogicalLines(tokens)

  # Process the logical lines.
  split_penalty_visitor.SplitPenalty(logical_lines).visit(ast_tree)

  return logical_lines


def _CreateLogicalLines(tokens):
  """Separate tokens into logical lines.

  Arguments:
    tokens: (list of tokenizer.TokenInfo) Tokens generated by tokenizer.

  Returns:
    A list of LogicalLines.
  """
  formatted_tokens = []

  # Convert tokens into "TokenInfo" and add tokens for continuation markers.
  prev_tok = None
  for tok in tokens:
    tok = TokenInfo(*tok)

    if (prev_tok and prev_tok.line.rstrip().endswith('\\') and
        prev_tok.start[0] < tok.start[0]):
      ctok = TokenInfo(
          type=CONTINUATION,
          string='\\',
          start=(prev_tok.start[0], prev_tok.start[1] + 1),
          end=(prev_tok.end[0], prev_tok.end[0] + 2),
          line=prev_tok.line)
      ctok.lineno = ctok.start[0]
      ctok.column = ctok.start[1]
      ctok.value = '\\'
      formatted_tokens.append(format_token.FormatToken(ctok, 'CONTINUATION'))

    tok.lineno = tok.start[0]
    tok.column = tok.start[1]
    tok.value = tok.string
    formatted_tokens.append(
        format_token.FormatToken(tok, token.tok_name[tok.type]))
    prev_tok = tok

  # Generate logical lines.
  logical_lines, cur_logical_line = [], []
  depth = 0
  for tok in formatted_tokens:
    if tok.type == tokenize.ENDMARKER:
      break

    if tok.type == tokenize.NEWLINE:
      # End of a logical line.
      logical_lines.append(logical_line.LogicalLine(depth, cur_logical_line))
      cur_logical_line = []
    elif tok.type == tokenize.INDENT:
      depth += 1
    elif tok.type == tokenize.DEDENT:
      depth -= 1
    elif tok.type == tokenize.NL:
      pass
    else:
      if (cur_logical_line and not tok.type == tokenize.COMMENT and
          cur_logical_line[0].type == tokenize.COMMENT):
        # We were parsing a comment block, but now we have real code to worry
        # about. Store the comment and carry on.
        logical_lines.append(logical_line.LogicalLine(depth, cur_logical_line))
        cur_logical_line = []

      cur_logical_line.append(tok)

  # Link the FormatTokens in each line together to form a doubly linked list.
  for line in logical_lines:
    previous = line.first
    bracket_stack = [previous] if previous.OpensScope() else []
    for tok in line.tokens[1:]:
      tok.previous_token = previous
      previous.next_token = tok
      previous = tok

      # Set up the "matching_bracket" attribute.
      if tok.OpensScope():
        bracket_stack.append(tok)
      elif tok.ClosesScope():
        bracket_stack[-1].matching_bracket = tok
        tok.matching_bracket = bracket_stack.pop()

  return logical_lines
