const express = require('express');
const multer = require('multer');
const axios = require('axios');
const fs = require('fs');
const os = require('os');
const morgan = require('morgan');
const path = require('path');
const { spawn } = require('child_process');
const mammoth = require('mammoth'); // DOCX
const xlsx = require('xlsx');       // XLSX

const app = express();
app.use(morgan('dev'));

// temp upload dir
const upload = multer({ dest: path.join(os.tmpdir(), 'uploads') });

// Ollama running on same server (installed as service)
const OLLAMA_URL = 'http://127.0.0.1:11434';

// JSON schema you want back
const schema = {
  supplier_name: "string|null",
  currency: "PKR|USD|EUR|null",
  issue_date: "YYYY-MM-DD|null",
  lines: [{
    sku: "string|null",
    product_name: "string",
    uom: "string|null",
    qty: "number|null",
    unit_price: "number|null",
    line_total: "number|null",
    notes: "string|null"
  }]
};

const BASE_PROMPT = `You are a strict extraction engine.
Your job is to read invoices, quotes, price lists or product catalogs
and return ONLY valid JSON matching this schema:

${JSON.stringify(schema, null, 2)}

Rules:
- If a field is missing, set null (do not guess).
- currency: normalize to PKR, USD, or EUR.
- Numbers must be plain digits (no commas, currency symbols).
- Combine tables across multiple pages / sheets.
- Ignore non-product lines like totals, tax, signature blocks.
Return a single JSON object, no markdown, no explanation.`;

// ---------- Helpers for different file types ----------

// PDF -> PNG pages (needs pdftoppm installed on server by root)
function pdfToPngs(pdfPath) {
  return new Promise((resolve, reject) => {
    const outBase = path.join(os.tmpdir(), 'pp-' + Date.now());
    const p = spawn('pdftoppm', ['-png', '-r', '180', pdfPath, outBase]);
    p.on('exit', async (code) => {
      if (code !== 0) return reject(new Error('pdftoppm failed with code ' + code));
      const dir = path.dirname(outBase);
      const base = path.basename(outBase);
      const files = (await fs.promises.readdir(dir))
        .filter(f => f.startsWith(base) && f.endsWith('.png'))
        .map(f => path.join(dir, f))
        .sort();
      resolve(files);
    });
  });
}

async function fileToBase64Images(filePath, mimetype) {
  if (mimetype === 'application/pdf') {
    const pngs = await pdfToPngs(filePath);
    const arr = [];
    for (const p of pngs) {
      const b = await fs.promises.readFile(p);
      arr.push(`data:image/png;base64,${b.toString('base64')}`);
    }
    return arr;
  } else {
    const b = await fs.promises.readFile(filePath);
    const ext = mimetype && mimetype.includes('png') ? 'png' : 'jpeg';
    return [`data:image/${ext};base64,${b.toString('base64')}`];
  }
}

// DOCX -> plain text
async function extractDocxText(filePath) {
  const result = await mammoth.extractRawText({ path: filePath });
  return result.value || '';
}

// XLSX -> simple text table (sheet by sheet)
function extractXlsxText(filePath) {
  const wb = xlsx.readFile(filePath);
  const lines = [];
  wb.SheetNames.forEach((sheetName) => {
    const sheet = wb.Sheets[sheetName];
    const rows = xlsx.utils.sheet_to_json(sheet, { header: 1, raw: true });
    lines.push(`Sheet: ${sheetName}`);
    rows.forEach((row) => {
      const rowStr = row
        .map((cell) => (cell === undefined || cell === null ? '' : String(cell)))
        .join(' | ');
      if (rowStr.trim().length > 0) {
        lines.push(rowStr);
      }
    });
    lines.push(''); // blank line between sheets
  });
  return lines.join('\n');
}

// Decide what to send to Ollama based on file type
async function prepareOllamaInput(file) {
  const ext = path.extname(file.originalname || '').toLowerCase();
  const mime = file.mimetype;

  // PDFs and images -> use vision (images)
  if (mime === 'application/pdf') {
    const images = await fileToBase64Images(file.path, mime);
    return { images, text: null };
  }

  if (mime.startsWith('image/')) {
    const b = await fs.promises.readFile(file.path);
    const extImage = mime.includes('png') ? 'png' : 'jpeg';
    const img = `data:image/${extImage};base64,${b.toString('base64')}`;
    return { images: [img], text: null };
  }

  // DOCX
  if (ext === '.docx') {
    const text = await extractDocxText(file.path);
    return { images: null, text };
  }

  // XLSX / XLSM
  if (ext === '.xlsx' || ext === '.xlsm') {
    const text = extractXlsxText(file.path);
    return { images: null, text };
  }

  // Fallback: treat as text file
  const buf = await fs.promises.readFile(file.path);
  return { images: null, text: buf.toString('utf8') };
}

// ---------- Routes ----------

// Simple health check
app.get('/health', (req, res) => {
  res.json({ ok: true });
});

// Main extraction endpoint
app.post('/extract', upload.single('file'), async (req, res) => {
  try {
    if (!req.file) {
      return res.status(400).json({
        ok: false,
        error: 'No file uploaded. Field name must be "file".'
      });
    }

    const { images, text } = await prepareOllamaInput(req.file);

    // Build final prompt
    let prompt = BASE_PROMPT;
    if (text) {
      // Avoid sending megabytes of text
      const maxChars = 8000;
      const trimmed = text.length > maxChars ? text.slice(0, maxChars) : text;
      prompt += `

Here is the raw content of the file to analyse:

${trimmed}
`;
    }

    const body = {
      model: 'qwen3-vl:4b',
      prompt,
      options: { temperature: 0.2, num_ctx: 4096 }
    };

    if (images && images.length > 0) {
      body.images = images;
    }

    const r = await axios.post(`${OLLAMA_URL}/api/generate`, body, {
      responseType: 'text',
      timeout: 180000
    });

    const textOut = r.data || '';
    const s = textOut.indexOf('{');
    const e = textOut.lastIndexOf('}');
    const jsonStr = (s >= 0 && e > s) ? textOut.slice(s, e + 1) : '{}';

    let data;
    try {
      data = JSON.parse(jsonStr);
    } catch (e2) {
      return res.status(502).json({
        ok: false,
        error: 'JSON parse error from model',
        raw: textOut
      });
    }

    return res.json({ ok: true, data });
  } catch (err) {
    return res.status(500).json({ ok: false, error: err.message });
  } finally {
    if (req.file) {
      fs.promises.unlink(req.file.path).catch(() => {});
    }
  }
});

// cPanel / Passenger supplies PORT
const PORT = process.env.PORT || 3000;
app.listen(PORT, () => {
  console.log('Invoice extractor listening on port', PORT);
});

module.exports = app;
