import { PDFPage } from '../../models';
import { convertToRegExpString, getOriginalIndex, isEntireWord, normalize } from './utils';
import { TextItem } from 'pdfjs-dist/types/src/display/api';
import { uuid4 } from '@sentry/utils';

export class PDFFindVisitor implements PDF.Page.IPageVisitor {
  protected findParams: PDF.Find.FindParams;
  protected RXquery: RegExp | null = null;
  totalFound: number = 0;
  protected hits: Record<number, PDF.Find.FindHitType[]> = {};
  protected list: PDF.Find.FindHitType['id'][] = [];
  protected data: Record<PDF.Find.FindHitType['id'], PDF.Find.FindHitType> = {};

  constructor(params: PDF.Find.FindParams) {
    this.findParams = params;
    if (this.findParams.phraseSearch === undefined) {
      this.findParams.phraseSearch = true;
    }
    this.findParams.query = normalize(params.query)[0];
  }

  private async extractPageTextInfo(page: PDFPage) {
    if (page.pageTextInfo === null) {
      const textContent = await page.getTextContent();
      if (textContent) {
        const strBuf = [];
        for (let j = 0; j < textContent.items.length; j++) {
          const textItem = textContent.items[j] as TextItem;
          strBuf.push(textItem.str);
          if (textItem.hasEOL) {
            strBuf.push('\n');
          }
        }
        // Store the normalized page content (text items) as one string.
        let [pageContents, pageDiffs, hasDiacritics] = normalize(strBuf.join(''));
        page.storeTextInformations(pageContents, pageDiffs, hasDiacritics);
      }
    }
  }

  private async calculateRegExpMatch(RXquery: RegExp, page: PDFPage) {
    const matches = [];
    const matchesLength = [];

    if (page.pageTextInfo) {
      const { pageDiffs, pageContents } = page.pageTextInfo;
      let match;
      while ((match = RXquery.exec(pageContents)) !== null) {
        if (
          this.findParams.entireWord &&
          !isEntireWord(pageContents, match.index, match[0].length)
        ) {
          continue;
        }

        const [matchPos, matchLen] = getOriginalIndex(pageDiffs, match.index, match[0].length);

        if (matchLen) {
          matches.push(matchPos);
          matchesLength.push(matchLen);
        }
      }

      //* CONVERT MATCHES *//
      const textContents = await page.getTextContent();
      let i = 0;
      let iIndex = 0;
      const textItems = textContents.items;
      const length = textItems.length - 1;
      const result: PDF.Find.FindHitType[] = [];
      for (let matchIndex = 0; matchIndex < matches.length; matchIndex++) {
        const match = matches[matchIndex];
        let matchIdx = match;
        while (i !== length && matchIdx >= iIndex + textItems[i].str.length) {
          iIndex += textItems[i].str.length;
          i++;
        }

        let hitStart = {
          id: textItems[i].id,
          offset: matchIdx - iIndex,
        };

        matchIdx += matchesLength[matchIndex];

        // Somewhat the same array as above, but use > instead of >= to get
        // the end position right.
        while (i !== length && matchIdx > iIndex + textItems[i].str.length) {
          iIndex += textItems[i].str.length;
          i++;
        }

        let hitEnd = {
          id: textItems[i].id,
          offset: matchIdx - iIndex,
        };
        let hit = {
          id: uuid4(),
          start: hitStart,
          end: hitEnd,
          numPage: page.getNumPage(),
        };
        result.push(hit);
        this.list.push(hit.id);
        this.data[hit.id] = hit;
      }
      this.hits[+page.id] = result;
      page.setFindHits(result);
      this.totalFound += result.length;
    }

    return this.hits[+page.id];
  }

  async visit(page: PDFPage) {
    let query = this.findParams.query;
    if (query.length === 0) {
      // Do nothing: the matches should be wiped out already.
      return [];
    }

    await this.extractPageTextInfo(page);
    if (!page.pageTextInfo) {
      return [];
    }

    const { hasDiacritics } = page.pageTextInfo;

    let isUnicode = false;

    if (this.findParams.phraseSearch) {
      [isUnicode, query] = convertToRegExpString(query, hasDiacritics);
    } else {
      // Words are sorted in reverse order to be sure that "foobar" is matched
      // before "foo" in case the query is "foobar foo".
      const match = query.match(/\S+/g);
      if (match) {
        query = match
          .sort()
          .reverse()
          .map((q) => {
            const [isUnicodePart, queryPart] = convertToRegExpString(q, hasDiacritics);
            isUnicode ||= isUnicodePart;
            return `(${queryPart})`;
          })
          .join('|');
      }
    }
    const flags = `g${isUnicode ? 'u' : ''}${this.findParams.caseSensitive ? '' : 'i'}`;
    const RXquery = new RegExp(query, flags);
    return this.calculateRegExpMatch(RXquery, page);
  }

  getRunInfo(): PDF.Find.CycleResultType {
    return {
      total: this.totalFound,
      hits: this.hits,
      list: this.list,
      data: this.data,
    };
  }
}
