import {
  AgentType,
  BaseFlowTool,
  FlowFile,
  FlowStep,
  FlowTool,
} from '../fileflow.interface'
import { defaultTransformerParameters, executeAzureTool, NON_PRINTABLE_REGEX } from '../utils'
import { DocuPandaTool } from './DocuPandaTool'
import { DocuPandaAsImageTool } from './DocuPandaAsImageTool'
import { DocuPandaRemoveWatermarkTool } from './DocuPandaRemoveWatermarkTool'
import { FileflowServiceInterface } from '../fileflow.interface'
import { JsonFormData } from '@cheaseed/node-utils'

const INSTRUCTIONS = `
### Input 
- Your input is a JSON list of arrays of strings.
- Each element of the list represents a row of cells in a table of financial data. Rows can have all cells with values, some cells with no values or all cells with no values
- The first row is the column header which can contain all empty cell values
- The first element of each row following the column header is called the row header.

### Instructions
- Please follow these instructions precisely
- The column header in the input should never be modified and should appear as is in the output
- Examine the table rows and indent the row header string by two additional spaces if you think the row is a sub-type of the previous row.
- When done with all rows, make sure the row header indentation is correct for the entire hierarchy of rows.
- If a '$' is followed by a number but separated by spaces, consider the '$' as belonging to the column containing the number. If a trailing '$' is found  in a row value, remove it
- If a column has a value of $ and the next immediate column has a number, concatenate the number with the $ sign in the first column
- Do not remove a row if all its values are empty
- Do NOT remove empty strings from the column header
- If an entire column of the table is empty, remove the column
- Make sure a column does not have multiple values
- A cell cannot have a value like '0000 0'
### Output
- The response should be a valid JSON object that contains an "output" property, which is a list of arrays of strings. 
- Ensure the output can be parsed into a valid Javascript JSON object

Use the following example input and expected output as a reference
### Example Input
[
                [
                  "A.2 Commitment Reconciliation:",
                  "LP #5's Allocation of Total Fund",
                  "",
                  "",
                  "Total Fund (incl. GP Allocation)",
                  "",
                  "",
                  "GP's Allocation of Total Fund",
                  "",
                  ""
                ],
                [
                  "Total Commitment",
                  "$ 50,000,000",
                  "$ 50,000,000",
                  "$ 50,000,000",
                  "$ 2,503,750,000",
                  "$ 2,503,750,000",
                  "$ 2,503,750,000",
                  "$ 3,750,000",
                  "$ 3,750,000",
                  "$ 3,750,000"
                ],
                [
                  "Beginning Unfunded Commitment:",
                  "$ 18,500,000",
                  "$ 23,500,000",
                  "$ 50,000,000",
                  "$ 926,387,500",
                  "$ 1,176,762,500",
                  "$ 2,503,750,000",
                  "1,387,500",
                  "1,762,500",
                  "3,750,000"
                ],
                [
                  "(Less Contributions)",
                  "0",
                  "(5,000,000)",
                  "(35,000,000)",
                  "0",
                  "(250,375,000)",
                  "(1,752,625,000)",
                  "0",
                  "(375,000)",
                  "(2,625,000)"
                ],
                [
                  "Plus Recallable Distributions",
                  "0",
                  "0",
                  "4,000,000",
                  "0",
                  "0",
                  "200,300,000",
                  "0",
                  "0",
                  "300,000"
                ],
                [
                  "(Less Expired/Released Commitments)",
                  "0",
                  "0",
                  "0",
                  "0",
                  "0",
                  "0",
                  "0",
                  "0",
                  "0"
                ],
                [
                  "+/- Other Unfunded Adjustment",
                  "0",
                  "0",
                  "(500,000)",
                  "0",
                  "0",
                  "(25,037,500)",
                  "0",
                  "0",
                  "(37,500)"
                ],
                [
                  "Ending Unfunded Commitment",
                  "$ 18,500,000",
                  "$ 18,500,000",
                  "$ 18,500,000",
                  "$ 926,387,500",
                  "$ 926,387,500",
                  "$ 926,387,500",
                  "$ 1,387,500",
                  "$ 1,387,500",
                  "$ 1,387,500"
                ]
              ]
### Example Output
\`\`\`json
{
  "output": [
                [
                  "A.2 Commitment Reconciliation:",
                  "LP #5's Allocation of Total Fund",
                  "",
                  "",
                  "Total Fund (incl. GP Allocation)",
                  "",
                  "",
                  "GP's Allocation of Total Fund",
                  "",
                  ""
                ],
                [
                  "Total Commitment",
                  "$ 50,000,000",
                  "$ 50,000,000",
                  "$ 50,000,000",
                  "$ 2,503,750,000",
                  "$ 2,503,750,000",
                  "$ 2,503,750,000",
                  "$ 3,750,000",
                  "$ 3,750,000",
                  "$ 3,750,000"
                ],
                [
                  "Beginning Unfunded Commitment:",
                  "$ 18,500,000",
                  "$ 23,500,000",
                  "$ 50,000,000",
                  "$ 926,387,500",
                  "$ 1,176,762,500",
                  "$ 2,503,750,000",
                  "1,387,500",
                  "1,762,500",
                  "3,750,000"
                ],
                [
                  "  (Less Contributions)",
                  "0",
                  "(5,000,000)",
                  "(35,000,000)",
                  "0",
                  "(250,375,000)",
                  "(1,752,625,000)",
                  "0",
                  "(375,000)",
                  "(2,625,000)"
                ],
                [
                  "  Plus Recallable Distributions",
                  "0",
                  "0",
                  "4,000,000",
                  "0",
                  "0",
                  "200,300,000",
                  "0",
                  "0",
                  "300,000"
                ],
                [
                  "  (Less Expired/Released Commitments)",
                  "0",
                  "0",
                  "0",
                  "0",
                  "0",
                  "0",
                  "0",
                  "0",
                  "0"
                ],
                [
                  "  +/- Other Unfunded Adjustment",
                  "0",
                  "0",
                  "(500,000)",
                  "0",
                  "0",
                  "(25,037,500)",
                  "0",
                  "0",
                  "(37,500)"
                ],
                [
                  "Ending Unfunded Commitment",
                  "$ 18,500,000",
                  "$ 18,500,000",
                  "$ 18,500,000",
                  "$ 926,387,500",
                  "$ 926,387,500",
                  "$ 926,387,500",
                  "$ 1,387,500",
                  "$ 1,387,500",
                  "$ 1,387,500"
                ]
              ]
}
\`\`\`
`

export class DocuPandaJsonIndenterTool extends BaseFlowTool {

  name = 'json-indenter'
  description = 'Data Validation and Formatting (standard)'
  precedents: FlowTool[] = [] //= [ inject(DocuPandaTool), inject(DocuPandaAsImageTool), inject(DocuPandaRemoveWatermarkTool) ]
  type = 'transformer'
  apiType = AgentType.completions
  outputType = 'json'
  assistantId = 'asst_kxIxCOBgOqaHr2C9FxP6G8xP'
  instructions = INSTRUCTIONS
  parameters: JsonFormData
  startTime = 0

  constructor(flowService?: FileflowServiceInterface) {
    super()
    if(flowService)
      this.initialize(flowService)
  }
  initialize(flowService: FileflowServiceInterface) {
    this.flowService = flowService
    this.parameters = defaultTransformerParameters
    this.precedents.push(
      new DocuPandaTool(this.flowService), 
      new DocuPandaAsImageTool(this.flowService),
      new DocuPandaRemoveWatermarkTool(this.flowService)
    )
    return this
  }

  
  shouldPublish(): boolean {
      return true
  }
  async execute(
      file: FlowFile,
      last: FlowStep | null,
      params?: any)
  {
    this.startTime = Date.now()
    
    // Get the output from the last step
    const output = last ? await this.flowService.getFileContents(last.storageName) : null
    if (!output)
      throw new Error(`No output found in last step ${last?.name} ${last?.outputURL}`)

    if (params?.saveGlobally) {
      this.instructions = params.instructions
      this.flowService.updateTool(this, { instructions: params.instructions })
    }
    const instructions = params?.instructions || this.instructions

    // Correct the indentation of the tables in the output
    const pages = output.data.result.pages
    for (let i = 0; i < pages?.length; i++) {
      const page = pages[i];
      const sections = page.sections;
      for (let j = 0; j < sections.length; j++) {
        if (sections[j].type === 'table') {
          this.flowService.log(`Found table in page ${i} section ${j}`);
          let tableRows = sections[j].tableList;
          //const start = getTableStartRow(tableRows); //check for blank rows; remove if present
          //tableRows = start > 0 ? tableRows.slice(start) : tableRows;
          //this.flowService.log('Table Start Row is ', start);
          const cleanRows = this.cleanArray(tableRows)
          const fixed_rows = await this.correctDocupandaIndentation(file, JSON.stringify(cleanRows), instructions, params.production);
          // replace the tableList with the corrected rows
          sections[j].tableList = fixed_rows;
        }
      }
    }
    // Update the FlowStep
    await this.flowService.uploadAnalysis(this, file, output);
  }

  cleanArray(rows: string[][]) {
    //const NON_PRINTABLE_REGEX = /[^\x20-\x7E]/gu
    let result = []
    for(let i = 0; i < rows.length; i++) {
      const row = rows[i]
      result.push(row.map((str: string) => str.replaceAll(NON_PRINTABLE_REGEX, '')))
    }
    return result
  }
  private async correctDocupandaIndentation(file: FlowFile, tableList: any, instructions, isProduction: boolean) {
    const messages = [
      {
        role: 'user',
        content: `${tableList}`,
      },
    ];
    const result:any = await executeAzureTool(
      this,
      messages,
      instructions,
      this.flowService,
      isProduction
    );
    
    const actual_output = JSON.parse(result).output
    this.flowService.log('Output: ', { instructions, result, actual_output })
    await this.flowService.updateStepPrompt(file, this.name, tableList, JSON.stringify(actual_output));
    return actual_output
  }

  getContentDisposition(fileName: string) {
    return  'inline;'
  }
}
