import {
  AgentType,
  BaseFlowTool,
  FlowFile,
  FlowStep,
} from '../fileflow.interface'
import { defaultTransformerParameters, executeAzureTool, NON_PRINTABLE_REGEX } from '../utils'
import { DocuPandaTool } from './DocuPandaTool'
import { DocuPandaAsImageTool } from './DocuPandaAsImageTool'
import { DocuPandaRemoveWatermarkTool } from './DocuPandaRemoveWatermarkTool'
import { FileflowServiceInterface } from '../fileflow.interface'
import { CHATGPT_COSTS, JsonFormData } from '@cheaseed/node-utils'
import { generateDocupandaPageOutputPreview } from '../docupanda-utils';
import { UNDERSCORE } from '../fidoc-constants'

export class DocuPandaJsonIndenterTool extends BaseFlowTool {

  name = 'json-indenter'
  description = 'Data Validation and Formatting (standard)'
  type = 'transformer'
  apiType = AgentType.completions
  outputType = 'json'
  assistantId = 'asst_kxIxCOBgOqaHr2C9FxP6G8xP'
  instructions = undefined
  parameters: JsonFormData = defaultTransformerParameters
  startTime = 0
 
  protected override initialize(flowService: FileflowServiceInterface) {
    super.initialize(flowService)
    this.precedents.push(
      new DocuPandaTool(this.flowService), 
      new DocuPandaAsImageTool(this.flowService),
      new DocuPandaRemoveWatermarkTool(this.flowService)
    )
    return this
  }

  async execute(
      file: FlowFile,
      last: FlowStep | null,
      params?: any)
  {
    this.startTime = Date.now()
    
    // Get the output from the last step
    let output = last ? await this.flowService.getFileContents(last.storageName) : null
    if (!output)
      throw new Error(`No output found in last step ${last?.name} ${last?.outputURL}`)

    const instructions = params?.instructions || await this.flowService.getToolInstructions(this)
    const model = params?.model || 'gpt-4o'

    // Correct the indentation of the tables in the output
    const pages = output.data.result.pages
    for (let i = 0; i < pages?.length; i++) {
      const page = pages[i];
      const sections = page.sections;
      for (let j = 0; j < sections.length; j++) {
        if (sections[j].type === 'table') {
          this.flowService.log(`Found table in page ${i} section ${j}`);
          let tableRows = sections[j].tableList;
          //const start = getTableStartRow(tableRows); //check for blank rows; remove if present
          //tableRows = start > 0 ? tableRows.slice(start) : tableRows;
          //this.flowService.log('Table Start Row is ', start);
          const cleanRows = this.cleanArray(tableRows)
          const fixed_rows = await this.correctDocupandaIndentation(file, JSON.stringify(cleanRows), instructions, model, params.production);
          // replace the tableList with the corrected rows
          sections[j].tableList = fixed_rows;
        }
      }
    }
    // Update the FlowStep
    await this.flowService.uploadAnalysis(this, file, output);    
  }

    cleanArray(rows: string[][]) {
        //const NON_PRINTABLE_REGEX = /[^\x20-\x7E]/gu
        let result = []
        for(let i = 0; i < rows.length; i++) {
            const row = rows[i]
            result.push(row.map((str: string) => {
                // 12/15/24 - Docupanda is adding an underscore in cells with no values
                // Remove it
                if(str === UNDERSCORE) 
                    return ''
                return str.replaceAll(NON_PRINTABLE_REGEX, '')}
            ))        
        }
        return result
    }

    private async correctDocupandaIndentation(file: FlowFile, tableList: any, instructions: string, model: string, isProduction: boolean) {
        const messages = [
            {
                role: 'user',
                content: `${tableList}`,
            }
        ]
        const { usage, result } = await executeAzureTool(
            this,
            messages,
            this.flowService,
            { instructions, model, isProduction }
        )
        
        const actual_output = JSON.parse(result).output
        this.flowService.log('Output: ', { instructions, result, actual_output })

        const userId = this.flowService.getUserId()
        const user = await this.flowService.getUser(userId)
        const prompt = await this.flowService.updateStepPrompt(file, this.name, tableList, JSON.stringify(actual_output))
        const costs = CHATGPT_COSTS[model]
        const azureCost = costs 
            ? (usage.prompt_tokens * costs.input / 1000) + (usage.completion_tokens * costs.output / 1000)
            : 0
        await this.flowService.logPipelineStep({
            user: userId,
            groupDocId: user.groupDocId, 
            fileDocId: file.docId,
            fileName: file.fileName,
            loggedAt: new Date(),
            fileSize: file.size,
            stepName: this.name,
            promptDocId: prompt.id,
            azureModelUsed: model, 
            input_tokens: usage.prompt_tokens,
            output_tokens: usage.completion_tokens,
            // azureTokensUsed: usage.total_tokens, // deprecated
            azureCost,
            elapsedMsec: Date.now() - this.startTime
        })        
        return actual_output
    }

    generateOutputPreview(output: any) {
        return generateDocupandaPageOutputPreview(output)
    }
}
