import type { ArticleData, ArticleSpan, Citation } from './types';
import { findCitationIndex } from './citations';
import { TableComponent } from '@xyla/openevidence-shared-functionality/src/typewriter/types';
import { splitByBracket, expandStringToList } from './components/table';
import { endWithPeriod } from './utils';
import {
  getQuoteAttributionElements,
  PublicationFigureProps,
  PublicationQuotationProps,
} from './components/publications';

const TABLE_COMPONENT_PREFIX = 'REACTCOMPONENT!:!Table!:!';
const IMAGE_COMPONENT_PREFIX = 'REACTCOMPONENT!:!PublicationFigure!:!';
const QUOTE_COMPONENT_PREFIX = 'REACTCOMPONENT!:!PublicationQuotation!:!';

/**
 * Get the rich text html for a given citation based on its index.
 * This should return a link that shows [ref number] but links to the article page
 *
 * @param references
 * @param index
 * @returns
 */
function getCitationRichText(references: Citation[], index: number): string {
  const currentReference = references[index];
  if (!currentReference) {
    // This reference wasn't found or the index is invalid
    return '[]';
  }
  if (typeof currentReference === 'string') {
    return '[' + currentReference + ']';
  } else {
    return (
      "<a href='" +
      currentReference.metadata.citation_detail.href +
      "'>[" +
      (index + 1) +
      ']</a>'
    );
  }
}

const citationRegex = /\[\d+(?:-\d+)?(?:, \d+(?:-\d+)?)*\]/g;

/**
 * Replace citations that are already embedded in the article text, as opposed to added programmatically.
 * This primarily affects tables at the moment.
 *
 * @param text
 * @param references
 * @returns
 */
function replaceInTextCitations(text: string, references: Citation[]): string {
  const splitText: string[] = splitByBracket(text);
  let alteredText: string = '';
  splitText.forEach((term) => {
    alteredText +=
      term?.match(citationRegex)?.map(
        (match: string) =>
          `${expandStringToList(match)
            .map((index: number) => {
              return getCitationRichText(references, index - 1);
            })
            .join(', ')}`
      ) ?? term;
  });
  return alteredText;
}

/**
 * Generate the html rich text for a table.
 *
 * @param tableText
 * @param references
 * @returns
 */
function generateTableRichText(
  tableText: string,
  references: Citation[]
): string {
  const alteredTableText = replaceInTextCitations(tableText, references);
  const tableData: TableComponent = JSON.parse(
    alteredTableText.substring(TABLE_COMPONENT_PREFIX.length)
  );
  if (tableData.table_data.length === 0) {
    return '';
  }

  let tableString = '<table>';

  // Generate headers for column names
  const columnNames: string[] = Object.keys(tableData.table_data[0]);
  let headerString: string = '<tr>';
  columnNames.forEach((element) => {
    headerString += '<th>' + element + '</th>';
  });
  headerString += '</tr>';
  tableString += headerString;

  // Generate rows based on data
  tableData.table_data.forEach((rowData) => {
    let rowString: string = '<tr>';
    columnNames.forEach((element) => {
      rowString += '<td>';
      if (element in rowData) {
        rowString += rowData[element];
      }
      rowString += '</td>';
    });
    rowString += '</tr>';
    tableString += rowString;
  });

  tableString += '</table>';
  return tableString;
}

function generateImageRichText(imageText: string): string {
  const image: PublicationFigureProps = JSON.parse(
    imageText.substring(IMAGE_COMPONENT_PREFIX.length)
  );

  let imageString = `<img src="${image.url}">${image.name}</img>\n`;
  imageString += `${image.caption}\n`;
  imageString += `${image.citation}\n`;

  return imageString;
}

function generateQuoteRichText(quoteText: string): string {
  const quote: PublicationQuotationProps = JSON.parse(
    quoteText.substring(QUOTE_COMPONENT_PREFIX.length)
  );

  const { authorString, authorSuffix, affilationString, affilationSuffix } =
    getQuoteAttributionElements(quote.full_author_list, quote.affiliations);

  let quoteString = `“${quote.text}”\n`;
  quoteString += `— ${authorString}`;
  if (authorSuffix) {
    quoteString += `, ${authorSuffix}`;
  }
  if (affilationString) {
    quoteString += `${affilationString}`;
  }
  if (affilationSuffix) {
    quoteString += `${affilationSuffix}`;
  }

  return quoteString;
}

/**
 * Generates the rich text corresponding to an article span.
 *
 * @param span
 * @param references
 * @returns
 */
function getArticleSpanRichText(
  span: ArticleSpan,
  references: Citation[]
): string {
  if (span.text.startsWith(TABLE_COMPONENT_PREFIX)) {
    return generateTableRichText(span.text, references);
  } else if (span.text.startsWith(IMAGE_COMPONENT_PREFIX)) {
    return generateImageRichText(span.text);
  } else if (span.text.startsWith(QUOTE_COMPONENT_PREFIX)) {
    return generateQuoteRichText(span.text);
  } else {
    let spanText: string = span.text;
    const seenCitations: Set<number> = new Set<number>();
    span.citations.forEach((citation) => {
      const citationIndex: number = findCitationIndex(references, citation);
      if (!seenCitations.has(citationIndex)) {
        seenCitations.add(citationIndex);
        spanText += getCitationRichText(references, citationIndex);
      }
    });
    return spanText;
  }
}

/**
 * Generates the rich text for the references section of an article
 *
 * @param references
 * @returns
 */
function getReferencesRichText(references: Citation[]): string {
  let referencesText: string = '<h3>References</h3>';
  referencesText += '<ol type="1">';
  references.forEach((citation) => {
    referencesText += '<li>';
    if (typeof citation === 'string') {
      referencesText += citation;
    } else {
      referencesText += endWithPeriod(
        "<a href='" +
          citation.metadata.citation_detail.href +
          "'>" +
          citation.metadata.citation_detail.title +
          '</a>'
      );
      if (citation.metadata.citation_detail.authors_string) {
        referencesText +=
          ' ' + endWithPeriod(citation.metadata.citation_detail.authors_string);
      }
      if (citation.metadata.citation_detail.publication_info_string) {
        referencesText +=
          ' ' +
          endWithPeriod(
            citation.metadata.citation_detail.publication_info_string
          );
      }
    }
    referencesText += '</li>';
  });
  referencesText += '</ol>';
  return referencesText;
}

/**
 * Generates the rich text for an entire article, including the contained text, tables, and references.
 *
 * @param article
 * @param references
 * @returns
 */
export default function getArticleRichText(
  article: ArticleData,
  references: Citation[],
  captureWarning?: (message: string) => void
): string {
  let totalText: string = '';
  article.articlesection_set.forEach((section) => {
    // Section titles: show when present, otherwise separate sections with
    // an extra line break.
    if (section.section_title) {
      totalText += '<h3>' + section.section_title + '</h3><br>';
    } else if (totalText.length > 0) {
      totalText += '<br>';
    }

    section.articleparagraph_set.forEach((paragraph) => {
      paragraph.articlespan_set.forEach((span) => {
        totalText += getArticleSpanRichText(span, references);
      });
      totalText += '<br><br>';
    });
  });

  if (references.length > 0) {
    totalText += getReferencesRichText(references);
  }

  if (totalText.includes('[]')) {
    captureWarning?.('Table did not render a complete references section.');
  }
  return totalText;
}

interface RichTextToMarkdownOptions {
  removeFormatting?: boolean;
  removeHeaders?: boolean;
  removeLinks?: boolean;
  removeImages?: boolean;
}

/**
 * Simple parser for HTML to Markdown conversion.
 * Not using a library to avoid overhead since the rich text isn't too complex,
 * but we may want to switch if we need more features.
 */
export function richTextToMarkdown(
  richText: string,
  options: RichTextToMarkdownOptions = {}
): string {
  let markdown = richText;

  const {
    removeFormatting = false,
    removeHeaders = false,
    removeLinks = false,
    removeImages = false,
  } = options;

  // Replace <br> tags with two spaces and a newline (Markdown line break)
  markdown = markdown.replace(/<br\s*\/?>/gi, '  \n');

  // Conditionally handle bold text
  if (!removeFormatting) {
    // Replace <strong> and <b> with ** for bold text
    markdown = markdown.replace(/<(strong|b)>(.*?)<\/\1>/gi, '**$2**');

    // Replace <em> and <i> with * for italic text
    markdown = markdown.replace(/<(em|i)>(.*?)<\/\1>/gi, '*$2*');
  } else {
    // Remove bold and italic tags but keep the inner text
    markdown = markdown.replace(/<\/?(strong|b|em|i)>/gi, '');
  }

  // Conditionally handle headings
  if (!removeHeaders) {
    // Replace headings <h1> to <h6> with corresponding Markdown hashes
    markdown = markdown.replace(
      /<h([1-6])>(.*?)<\/h\1>/gi,
      (_, level, content) => {
        return `${'#'.repeat(parseInt(level))} ${content}\n\n`;
      }
    );
  } else {
    // Remove header tags but keep the inner text and add a newline
    markdown = markdown.replace(/<h[1-6]>([\s\S]*?)<\/h[1-6]>/gi, '$1\n\n');
  }

  // Replace paragraphs <p> with double newline
  markdown = markdown.replace(/<p>(.*?)<\/p>/gi, '$1\n\n');

  // Conditionally handle links
  if (!removeLinks) {
    // Replace links <a href="url">text</a> with [text](url)
    markdown = markdown.replace(/<a\s+href="([^"]+)">(.*?)<\/a>/gi, '[$2]($1)');
  } else {
    // Remove link tags but keep the inner text
    markdown = markdown.replace(/<\/?a\s+[^>]+>/gi, '');
  }

  // Replace unordered lists <ul><li> with - items
  markdown = markdown.replace(
    /<ul[^>]*>([\s\S]*?)<\/ul>/gi,
    (match, content) => {
      return content.replace(/<li>(.*?)<\/li>/gi, '- $1\n') + '\n';
    }
  );

  // Handle ordered lists <ol><li> with 1. items
  markdown = markdown.replace(
    /<ol[^>]*>([\s\S]*?)<\/ol>/gi,
    (match, content: string) => {
      let index = 1;
      return (
        content.replace(/<li>(.*?)<\/li>/gi, (match, itemContent) => {
          return `${index++}. ${itemContent}\n`; // Ensure the content of each <li> is correctly replaced
        }) + '\n'
      );
    }
  );

  // Conditionally handle images
  if (!removeImages) {
    // Replace images <img src="url" alt="alt"> with ![alt](url)
    markdown = markdown.replace(
      /<img\s+src="([^"]+)"\s+alt="([^"]*)"\s*\/?>/gi,
      '![$2]($1)'
    );
  } else {
    // Remove image tags completely
    markdown = markdown.replace(/<img\s+[^>]*\/?>/gi, '');
  }

  // Remove any remaining HTML tags
  markdown = markdown.replace(/<\/?[^>]+(>|$)/g, '');

  // Decode common HTML entities
  markdown = markdown
    .replace(/&nbsp;/g, ' ')
    .replace(/&amp;/g, '&')
    .replace(/&lt;/g, '<')
    .replace(/&gt;/g, '>')
    .replace(/&quot;/g, '"')
    .replace(/&#39;/g, "'");

  // Trim leading and trailing whitespace
  markdown = markdown.trim();

  return markdown;
}

/**
 * Generates the plain text for an entire article, including the contained text, tables, and references.
 *
 * @param article
 * @param references
 * @returns
 */
export function getArticleMarkdown(
  article: ArticleData,
  references: Citation[],
  options: RichTextToMarkdownOptions = {}
): string {
  const richText = getArticleRichText(article, references);

  return richTextToMarkdown(richText, options);
}
