import { XmlDocument, type XmlElement, type XmlNode } from 'xmldoc';
import { range, setsHaveSameContents } from '../utils/collections';
import { throwError } from '../utils/flowControl';
import {
  type default as AST,
  type AllowedInSpanContext,
  type FracToken,
  isAllowedInSpanContext
} from './AST';

/**
 * Parse a markup string.
 *
 * # Definition of markup language
 *
 * This follows XML syntax. All tags must be closed, and all attributes must be strings.
 * Some characters are reserved and must be replaced with escape sequences. E.g. < should be written with "&lt;".
 * See https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references#List_of_character_entity_references_in_HTML
 * for all escape sequences.
 *
 * Note that there's a concept of "span context", where the nodes are rendered as children
 * of a react-native <Text> component, using text-wrap functionality (like display: inline in CSS).
 * You can technically put any component in this context, but it's inconsistent between platforms (iOS and Android and
 * web) so we avoid putting anything in it other than text.
 *
 * Line-wrapping: When using non-text tags, any tags not separated by whitespace will be **automatically** grouped with
 * the <g> tag at the rendering stage. This prevents them from getting linebreaks put between them. This behaviour can
 * be turned off with rendering options.
 *
 * We use the following tags:
 *
 * - text (no tag) - Just some text. Must be used inside span context.
 * - <b> - The enclosed text is modified to be bold. Contents are in span context.
 * - <i> - The enclosed text is modified to be italics. Contents are in span context.
 * - <br> - Inserts a line break. When used inside span context, this becomes the \n character.
 * - <ans> - An answer box. Must not be used inside span context.
 *   - attrs:
 *     - i (optional) - Index indicating which string from the user answer array to use as state.
 *       Defaults to next available index.
 * - <frac> - A mixed fraction. Must not be used inside span context.
 *   - attrs:
 *     - w (optional) - String contents of the "whole number" part of the mixed fraction.
 *     - n - String contents of the "numerator" part of the mixed fraction.
 *     - d - String contents of the "denominator" part of the mixed fraction.
 *   - all attributes can be given as wAns, nAns or dAns instead. This is like placing an <ans i='?'/> tag in where
 *     ? represents the i attribute of <ans>. If ? is the empty string, this is like not specifying the i attribute.
 *   - all attributes can be given as children, using the <w>, <n> and <d> tags instead. This allows you to insert
 *     markup in these positions, rather than just strings. If that markup represents just text, it's placed in span
 *     context. Otherwise, it should be a single node.
 * - <asset> - A JSX element. Must not be used inside span context. Must be used in conjunction with the MarkupAssets.
 *   - attrs:
 *     - name - String index of the asset to use from the MarkupAssets
 * - <g> - Wrap other tokens or text in this tag to have them render together as a group. Must not be used inside
 *   span context.
 *
 * See also {@link AST} for an idea of what the markup is parsed into, and therefore what is possible.
 *
 * ## Example of valid markup
 *
 * ```xml
 * 2 × 3 = <ans i='0'/><br/>2 = <frac nAns='1'><d><b>3</b></d><br/></frac><br/>Is <frac n='2' d='3'/> &lt; 2 true?<br/>Your answer: <ans i='2'/>
 * ```
 */
export default function parseMarkup(input: string): { tokens: AST[]; numberOfAns: number } {
  // We support markup which doesn't have outer tags, so wrap in an extra tag before parsing, and replace it later.
  const doc = new XmlDocument(`<outer>${input}</outer>`);

  // We keep track of which answer indices were explicitly mentioned in the markup, e.g. <ans i='2'/>.
  const usedAnswerIndices: Set<number> = new Set();

  // Step through the children, replacing them with our friendly AST types.
  const tokens = doc.children.map(child => parseNode(child, usedAnswerIndices));

  // As we went along, we may have set some answer indices to -1. Now do a pass setting them to real values.
  tokens.forEach(node => updateIndex(node, usedAnswerIndices));

  // Finally, check that there are no gaps in the usedAnswerIndices.
  const noGaps =
    usedAnswerIndices.size === 0 ||
    setsHaveSameContents(usedAnswerIndices, new Set(range(0, usedAnswerIndices.size - 1)));
  if (!noGaps) {
    throw Error('Invalid markup - <ans> tags need to be given indices with no gaps');
  }

  return { tokens, numberOfAns: usedAnswerIndices.size };
}

/** Parse a single node. */
function parseNode(node: XmlNode, usedAnswerIndices: Set<number>): AST {
  switch (node.type) {
    case 'cdata':
    case 'comment':
      return throwError('Invalid markup - saw CDATA or comment tags.');

    case 'text':
      return { type: 'text', value: node.text };

    case 'element': {
      switch (node.name) {
        case 'b':
        case 'i': {
          const children = node.children.map(child => parseNode(child, usedAnswerIndices));
          const childrenAllowedInSpanContext = children.filter(isAllowedInSpanContext);
          if (children.length !== childrenAllowedInSpanContext.length) {
            throw Error(`Invalid markup - saw illegal children of tag <${node.name}>`);
          }
          return {
            type: 'span',
            children: childrenAllowedInSpanContext,
            styleModifiers: node.name === 'b' ? 'bold' : 'italic'
          };
        }
        case 'br':
          return { type: 'br' };
        case 'ans': {
          const indexFromAttrs = node.attr.i;
          let index: number;
          if (indexFromAttrs !== undefined) {
            // Use the index given
            index = parseInt(indexFromAttrs);
            usedAnswerIndices.add(index);
          } else {
            // Set to -1 for now. We'll sort it out later.
            index = -1;
          }

          return { type: 'ans', index: index };
        }
        case 'frac': {
          return parseFraction(node, usedAnswerIndices);
        }
        case 'asset': {
          return { type: 'asset', name: node.attr.name };
        }
        case 'g': {
          return {
            type: 'group',
            children: node.children.map(child => parseNode(child, usedAnswerIndices))
          };
        }
        default:
          return throwError(`Invalid markup - unrecognized tag ${node.name}.`);
      }
    }
  }
}

/** Parse a fraction. */
function parseFraction(node: XmlElement, usedAnswerIndices: Set<number>): FracToken {
  // Fraction. Try to get the properties from its children. If that fails, look at attributes (which can only
  // be strings). We use attributes w, n, d. We also use attributes wAns, nAns and dAns to indicate answer
  // boxes.
  const getProp = (tag: string): [AST] | AllowedInSpanContext[] | undefined => {
    const fromChildren: XmlNode | undefined = node.childNamed(tag);
    if (fromChildren !== undefined) {
      const children = fromChildren.children.map(node => parseNode(node, usedAnswerIndices));
      if (children.length === 1) {
        return [children[0]];
      }

      const childrenAllowedInSpanContext = children.filter(isAllowedInSpanContext);
      if (children.length !== childrenAllowedInSpanContext.length) {
        throw Error(`Invalid markup - saw multiple non-span children of <frac>'s "${tag}"`);
      }
      return childrenAllowedInSpanContext;
    }

    const fromAttr: string | undefined = node.attr[tag];
    if (fromAttr !== undefined) {
      return [{ type: 'text', value: fromAttr }];
    }

    const fromAttrAns: string | undefined = node.attr[tag + 'Ans'];
    if (fromAttrAns !== undefined) {
      const index = fromAttrAns === '' ? -1 : parseInt(fromAttrAns);
      if (index !== -1) {
        usedAnswerIndices.add(index);
      }
      return [{ type: 'ans', index: index }];
    }

    return undefined;
  };

  return {
    type: 'frac' as const,
    whole: getProp('w'),
    numerator: getProp('n') ?? throwError('Invalid markup - <frac> missing numerator'),
    denominator: getProp('d') ?? throwError('Invalid markup - <frac> missing denominator')
  };
}

/** Replace any <ans> tokens with index -1 to the next available index. */
function updateIndex(token: AST, usedAnswerIndices: Set<number>) {
  switch (token.type) {
    case 'span':
      token.children.forEach(child => updateIndex(child, usedAnswerIndices));
      break;
    case 'text':
    case 'br':
      break;
    case 'ans':
      if (token.index === -1) {
        token.index = nextAvailableIndex(usedAnswerIndices);
      }
      break;
    case 'frac':
      if (token.whole !== undefined) {
        token.whole?.forEach(child => updateIndex(child, usedAnswerIndices));
      }
      token.numerator.forEach(child => updateIndex(child, usedAnswerIndices));
      token.denominator.forEach(child => updateIndex(child, usedAnswerIndices));
      break;
    case 'group':
      token.children.forEach(child => updateIndex(child, usedAnswerIndices));
      break;
    case 'asset':
      break;
    default:
      // Produces TS error and throws runtime error if we missed a case
      throw new Error(`Logic error: unreachable (${token satisfies never})`);
  }
}

function nextAvailableIndex(usedAnswerIndices: Set<number>): number {
  let trial = 0;
  while (usedAnswerIndices.has(trial)) {
    trial++;
  }
  usedAnswerIndices.add(trial);
  return trial;
}
