import { UserRecord } from '../user.models';
import {
  BaseFlowTool,
  FlowFile,
  FlowStep,
  FlowTool,
} from '../fileflow.interface';
import { AzureKeyCredential } from '@azure/ai-form-recognizer';
import DocumentIntelligence, { AnalyzeBatchDocumentsDefaultResponse, AnalyzeOperationOutput } from '@azure-rest/ai-document-intelligence'
import { getLongRunningPoller, isUnexpected } from '@azure-rest/ai-document-intelligence'
import { JsonFormData } from '@cheaseed/node-utils';
import { FileflowServiceInterface } from '../fileflow.interface';
import { AZURE_DOCINTEL_SERVERS } from '../azure-constants';

export interface DocIntelAnalysisPageInterface {
  pageNumber: number
  lines: string[]
  markdown: string
}
export interface DocIntelAnalysisInterface {
  documentMarkdown: string
  pages: DocIntelAnalysisPageInterface[]
}
export class DocIntelTool extends BaseFlowTool {

  name = 'alternate-ocr'
  description = 'Data Extraction (alternate)'
  type = 'ocr'
  startTime = 0

  parameters: JsonFormData = {
    submitAlwaysEnabled: true,
    controls: [
      {
        name: 'pages',
        label: 'Page range',
        type: 'text',
        placeholder: '(e.g. 1-3,6,10)'
      }
    ]
  }

  azureDocClient = DocumentIntelligence(AZURE_DOCINTEL_SERVERS[0].endpoint,
    new AzureKeyCredential(AZURE_DOCINTEL_SERVERS[0].key1))

  async checkExecute(file: FlowFile, user: UserRecord) {
    await this.flowService.defaultCheckExecute(file, user, file.numPages)
  }

  async execute(
    file: FlowFile,
    last: FlowStep | null,
    params?: any) {

    this.startTime = Date.now()
    this.flowService.log('analyzing file', file, params)
    const body: any = {
      contentType: "application/json",
      body: {
        urlSource: file.downloadURL
      },
      queryParameters: { pages: '1-50', outputContentFormat: "markdown" }
    }
    if (params?.pages) {
      body.queryParameters.pages = params.pages // Should be 1-based indexes
      this.flowService.log('updated body pages', body)
    }

    const initialResponse = await this.azureDocClient
      .path("/documentModels/{modelId}:analyze", "prebuilt-layout")
      .post(body)
    this.flowService.log('initialResponse', initialResponse)
    if (isUnexpected(initialResponse)) {
      throw initialResponse.body.error
    }
    const poller:AnalyzeBatchDocumentsDefaultResponse = await getLongRunningPoller(this.azureDocClient, initialResponse)
    const analysis = poller.body as AnalyzeOperationOutput
    const analysisLength = JSON.stringify(analysis.analyzeResult).length
    this.flowService.log('analysis complete', file.fileName, analysisLength)
    // The output is a very large object containing bounding box info, words, lines etc per page
    // we strip it down a bit before storing

    const result = await this.reformatDocIntelAnalysisOutput(analysis)
    await this.flowService.uploadAnalysis(this, file, result)
    // Broadcast the number of pages consumed
    const numPages = result.pages?.length || 0
    this.flowService.updateFile(file, { numPages })
    this.flowService.consumePages(numPages)
  }

  async reformatDocIntelAnalysisOutput(analysis: AnalyzeOperationOutput) {
    const obj: any = {}
    const fullMarkdown = analysis.analyzeResult?.content || ''
    //const p0 = analysis.analyzeResult?.paragraphs?.at(0) || ''
    //this.flowService.log('PARAGRAPHS', JSON.stringify(p0))

    const pages = analysis.analyzeResult?.pages || []
    obj.documentMarkdown = fullMarkdown
    obj.pages = []
    let currentIndex = 0
    for (let i = 0; i < pages.length; i++) {
      const p = pages[i]
      //this.flowService.log("LINES", JSON.stringify(p.lines?.at(0)))
      const pObj: any = { pageNumber: p.pageNumber }
      const lines = p.lines?.map(l => l.content) || []

      //if(p.pageNumber === 1)
      //  this.flowService.log('LINES in original page 1', p.lines)

      //this.flowService.log('Number of lines in page', pObj.pageNumber, lines.length)
      //this.flowService.log('FIRST LINE on ', pObj.pageNumber, lines[0])
      //this.flowService.log('LAST LINE on ', pObj.pageNumber, lines[lines.length -1])
      pObj.lines = lines
      obj.pages.push(pObj as DocIntelAnalysisPageInterface)
      // break up the markdown page by page by comparing the last 2 lines
      //const secondLastLine = lines.length <= 1 ? "": lines[lines.length - 2] 
      if (lines.length > 0) {
        const endOfPageString = lines[lines.length - 1]
        //this.flowService.log('endOfPageString', endOfPageString)
        //this.flowService.log('currentIndex', currentIndex)
        const endOfPageIndex = fullMarkdown.indexOf(endOfPageString, currentIndex)
        if (endOfPageIndex == -1)
          throw new Error(`Invalid endOfPageIndex`)
        //this.flowService.log('endOfPageIndex', endOfPageIndex)
        //this.flowService.log('substring', obj.fullMarkdown.substring(currentIndex, endOfPageIndex))
        const pageMarkDown = fullMarkdown.substring(currentIndex, endOfPageIndex) + endOfPageString
        this.flowService.log('pageMarkDown', pageMarkDown)
        currentIndex = endOfPageIndex + endOfPageString.length + 1
        pObj.markdown = pageMarkDown
      }
    }
    return obj as DocIntelAnalysisInterface
  }

}
