// bulk-import-engine.jsx — Bulk Catalog Import pipeline
// ─────────────────────────────────────────────────────────────────
// Universal ingest for catalog assets (works, recordings, releases,
// agreements, profiles, publishers, videos) from CSVs/TSVs/JSON/XLSX/
// DDEX-XML/CWR-NWR/Airtable-export. Goes file → sniff → parse →
// suggest mappings → validate → dedup against existing RS → preview
// diff → commit (dry-run by default).
//
// This is the back-end that bulk-import.jsx renders.
//
// Exports: window.BULK_IMPORT_ENGINE, window.BULK_IMPORT_ADAPTERS
// ─────────────────────────────────────────────────────────────────
(function () {
  if (typeof window === 'undefined') return;

  // ════════════════════════════════════════════════════════════════
  // ENTITY SCHEMAS — canonical target shapes inside RS
  // ════════════════════════════════════════════════════════════════
  // Each schema declares fields with: key, label, type, required,
  // synonyms (column-name fuzzy hints used for auto-mapping),
  // validator (returns null|errString), example.
  const SCHEMAS = {
    works: {
      label: 'Works (compositions)',
      idKey: 'Work ID', natural: 'ISWC',
      pluralStat: 'works',
      fields: [
        { k: 'Work Title',   t:'string', req:true,  syn:['title','work title','song name','composition','work name','song'], ex:'Somos Los Que Faltan' },
        { k: 'ISWC',         t:'iswc',   req:false, syn:['iswc','iswc code','international standard musical work code'], ex:'T9150768084' },
        { k: 'Work ID',      t:'id',     req:false, syn:['work id','internal id','wid'], ex:'RWOR10001' },
        { k: 'Duration',     t:'duration', req:false, syn:['duration','length','runtime'], ex:'3:24' },
        { k: 'Language',     t:'string', req:false, syn:['language','lang','iso lang'], ex:'Spanish' },
        { k: 'Copyright Date', t:'date', req:false, syn:['copyright date','c-line date','© date'], ex:'2005-01-01' },
        { k: 'CWR Work Type', t:'string',req:false, syn:['cwr work type','genre','category'], ex:'Latin' },
        { k: 'Version Type', t:'string', req:false, syn:['version type','version','original/remix'], ex:'Original Work' },
        { k: 'Grand Rights', t:'yn',     req:false, syn:['grand rights','gr'], ex:'N' },
        { k: 'Recorded Indicator', t:'yn', req:false, syn:['recorded indicator','recorded','has recording'], ex:'Y' },
      ],
    },
    recordings: {
      label: 'Recordings (masters)',
      idKey: 'Recording ID', natural: 'ISRC (Audio)',
      pluralStat: 'recordings',
      fields: [
        { k: 'Track Name',   t:'string', req:true,  syn:['track name','recording name','title','master title','track'], ex:'Somos Los Que Faltan (Introducción)' },
        { k: 'Song Name',    t:'string', req:false, syn:['song name','work title','composition','underlying work'], ex:'Somos Los Que Faltan' },
        { k: 'Recording ID', t:'id',     req:false, syn:['recording id','master id','rec id'], ex:'RREC10001' },
        { k: 'ISRC (Audio)', t:'isrc',   req:false, syn:['isrc','isrc audio','isrc (audio)','isrc code'], ex:'USXLH0500001' },
        { k: 'Artist',       t:'string', req:false, syn:['artist','performer','primary artist','main artist','artists'], ex:'Uroyan,Tony Music' },
        { k: 'Label',        t:'string', req:false, syn:['label','imprint','record label'], ex:'Rocket Science LLC' },
        { k: 'Duration',     t:'duration', req:false, syn:['duration','length','runtime'], ex:'3:24' },
        { k: 'Status',       t:'enum',   req:false, syn:['status','state'], values:['Released','Unreleased','In progress','Withdrawn'], ex:'Released' },
        { k: 'Genre',        t:'string', req:false, syn:['genre','primary genre','category'], ex:'Latin Urban' },
        { k: 'Language',     t:'string', req:false, syn:['language','lang','vocal language'], ex:'Spanish' },
        { k: 'Recording Date', t:'date', req:false, syn:['recording date','session date','recorded'], ex:'2005-04-12' },
        { k: 'Controlled',   t:'yn',     req:false, syn:['controlled','controlled status','admin'], ex:'Yes' },
        { k: 'Explicit',     t:'yn',     req:false, syn:['explicit','parental advisory','clean/explicit'], ex:'No' },
      ],
    },
    releases: {
      label: 'Releases (albums / EPs / singles)',
      idKey: 'Release ID', natural: 'UPC (Release)',
      pluralStat: 'releases',
      fields: [
        { k: 'Release Name', t:'string', req:true, syn:['release name','album','release title','title','album name'], ex:'En El Mambo' },
        { k: 'Release ID',   t:'id',    req:false, syn:['release id','catalog id','rel id'], ex:'RREL10001' },
        { k: 'UPC (Release)',t:'upc',   req:false, syn:['upc','ean','barcode','upc release'], ex:'888174442860' },
        { k: 'Release Artist', t:'string', req:false, syn:['release artist','album artist','primary artist'], ex:'Uroyan,Tony Music' },
        { k: 'Release Date', t:'date',  req:false, syn:['release date','street date','original release'], ex:'2005-09-12' },
        { k: 'Label',        t:'string',req:false, syn:['label','imprint'], ex:'Rocket Science LLC' },
        { k: 'Release Type', t:'enum',  req:false, syn:['release type','type','format'], values:['Album','EP','Single','Compilation','Mixtape'], ex:'Album' },
        { k: 'P-Line',       t:'string',req:false, syn:['p-line','p line','phonogram copyright'], ex:'℗ 2005 Rocket Science' },
        { k: 'C-Line',       t:'string',req:false, syn:['c-line','c line','copyright line'], ex:'© 2005 Rocket Science' },
      ],
    },
    profiles: {
      label: 'Profiles (writers · artists · people)',
      idKey: 'profile_id', natural: 'ipi_name_number',
      pluralStat: 'profiles',
      fields: [
        { k: 'first_name',      t:'string', req:true,  syn:['first name','first','given name','firstname'], ex:'Paul' },
        { k: 'last_name',       t:'string', req:true,  syn:['last name','surname','family name','lastname'], ex:'Llanos Rodríguez' },
        { k: 'middle_name',     t:'string', req:false, syn:['middle name','middle initial','middle'], ex:'Angelo' },
        { k: 'artist_name',     t:'string', req:false, syn:['artist name','stage name','performer','professional name','aka','alias'], ex:'Uroyan' },
        { k: 'ipi_name_number', t:'ipi',    req:false, syn:['ipi','ipi name number','ipi name','ipi #','cae'], ex:'00583374915' },
        { k: 'pro_affiliation', t:'enum',   req:false, syn:['pro','pro affiliation','society','rights org','collection society'], values:['ASCAP','BMI','SESAC','GMR','SACEM','PRS','GEMA','SIAE','SOCAN','APRA','JASRAC','STIM','KOMCA','None'], ex:'ASCAP' },
        { k: 'profile_type',    t:'enum',   req:false, syn:['type','profile type','entity type','kind'], values:['Person','Group','Band','Other'], ex:'Person' },
        { k: 'alias',           t:'string', req:false, syn:['alias','also known as','aka'], ex:'' },
      ],
    },
    publishers: {
      label: 'Publishers (incl. sub-publishers)',
      idKey: 'publisher_id', natural: 'ipi',
      pluralStat: 'publishers',
      fields: [
        { k: 'name',         t:'string', req:true,  syn:['name','publisher','publisher name','company','imprint name'], ex:'Pluralis Music' },
        { k: 'ipi',          t:'ipi',    req:false, syn:['ipi','ipi number','ipi #','cae'], ex:'00578913241' },
        { k: 'role',         t:'enum',   req:false, syn:['role','publisher role','rel'], values:['Original Publisher','Administrator','Sub-Publisher','Co-Publisher','Income Participant','Acquirer'], ex:'Original Publisher' },
        { k: 'territory',    t:'string', req:false, syn:['territory','tis','country','region','scope'], ex:'World' },
        { k: 'parent',       t:'string', req:false, syn:['parent','parent company','parent publisher'], ex:'' },
        { k: 'pro',          t:'enum',   req:false, syn:['pro','pro affiliation','society'], values:['ASCAP','BMI','SESAC','GMR','SACEM','PRS','GEMA','SIAE','SOCAN','APRA','JASRAC','STIM','KOMCA'], ex:'ASCAP' },
      ],
    },
    agreements: {
      label: 'Agreements (contracts)',
      idKey: 'agreement_id', natural: 'agreement_number',
      pluralStat: 'agreements',
      fields: [
        { k: 'agreement_number', t:'string', req:true,  syn:['agreement number','agr no','agreement id','contract no','agr #'], ex:'AGR-2024-0014' },
        { k: 'agreement_type',   t:'enum',   req:true,  syn:['agreement type','type','deal type'], values:['Original','Sub-Publishing','Administration','Co-Publishing','Songwriter','Co-Writing'], ex:'Original' },
        { k: 'assignor',         t:'string', req:true,  syn:['assignor','from','licensor','grantor','writer'], ex:'Paul Llanos Rodríguez' },
        { k: 'acquirer',         t:'string', req:true,  syn:['acquirer','to','licensee','grantee','publisher'], ex:'Pluralis Music' },
        { k: 'effective_date',   t:'date',   req:false, syn:['effective date','start date','agreement start','from'], ex:'2024-01-01' },
        { k: 'expiry_date',      t:'date',   req:false, syn:['expiry date','end date','agreement end','to','through'], ex:'2027-01-01' },
        { k: 'territory',        t:'string', req:false, syn:['territory','tis','scope','region'], ex:'World' },
        { k: 'pr_share',         t:'pct',    req:false, syn:['pr share','perf share','performance share','public performance %'], ex:'50' },
        { k: 'mr_share',         t:'pct',    req:false, syn:['mr share','mech share','mechanical share','mech %'], ex:'50' },
        { k: 'sr_share',         t:'pct',    req:false, syn:['sr share','sync share','synchro share','sync %'], ex:'50' },
      ],
    },
    videos: {
      label: 'Music videos',
      idKey: 'video_id', natural: 'isrc_video',
      pluralStat: 'videos',
      fields: [
        { k: 'video_title',  t:'string', req:true,  syn:['video title','title','music video','clip name'], ex:'Somos Los Que Faltan (Official)' },
        { k: 'isrc_video',   t:'isrc',   req:false, syn:['isrc','isrc video','isrc (video)','vid isrc'], ex:'USXLH2500001' },
        { k: 'recording_id', t:'id',     req:false, syn:['recording id','linked recording','master id','underlying recording'], ex:'RREC10001' },
        { k: 'youtube_url',  t:'url',    req:false, syn:['youtube','youtube url','yt link','vevo url'], ex:'https://youtu.be/abc' },
        { k: 'release_date', t:'date',   req:false, syn:['release date','premiere','published','street date'], ex:'2005-09-12' },
        { k: 'director',     t:'string', req:false, syn:['director','directed by'], ex:'' },
        { k: 'duration',     t:'duration', req:false, syn:['duration','length','runtime'], ex:'4:08' },
      ],
    },
    statements: {
      label: 'Royalty statements (lines)',
      idKey: 'lineId', natural: 'lineId',
      pluralStat: 'statement lines',
      fields: [
        { k: 'sourceId',     t:'string', req:true,  syn:['source','source id','vendor','statement source'], ex:'src_ascap_us' },
        { k: 'periodStart',  t:'date',   req:true,  syn:['period start','from','quarter start','period from'], ex:'2025-01-01' },
        { k: 'periodEnd',    t:'date',   req:true,  syn:['period end','to','quarter end','period to'], ex:'2025-03-31' },
        { k: 'workTitle',    t:'string', req:false, syn:['work title','title','song title','composition'], ex:'Somos Los Que Faltan' },
        { k: 'iswc',         t:'iswc',   req:false, syn:['iswc'], ex:'T9150768084' },
        { k: 'isrc',         t:'isrc',   req:false, syn:['isrc'], ex:'USXLH0500001' },
        { k: 'territory',    t:'iso2',   req:false, syn:['territory','country','iso country'], ex:'US' },
        { k: 'units',        t:'number', req:false, syn:['units','plays','streams','count','quantity'], ex:'12450' },
        { k: 'grossAmount',  t:'number', req:true,  syn:['gross amount','amount','royalty','gross','revenue','earnings'], ex:'342.18' },
        { k: 'currency',     t:'iso4217',req:true,  syn:['currency','ccy','iso currency'], ex:'USD' },
        { k: 'rightsType',   t:'enum',   req:false, syn:['rights type','type','right kind'], values:['PR','MR','SR','NR'], ex:'PR' },
      ],
    },
  };

  // ════════════════════════════════════════════════════════════════
  // FORMAT SNIFFING
  // ════════════════════════════════════════════════════════════════
  // CSV detection: try comma / tab / semicolon / pipe — pick highest-row-consistency.
  function sniffDelimiter(text) {
    const head = text.split(/\r?\n/).slice(0, 8).filter(Boolean);
    if (head.length === 0) return ',';
    const cands = [',', '\t', ';', '|'];
    let best = ',', bestScore = -Infinity;
    for (const d of cands) {
      const counts = head.map(l => splitCSV(l, d).length);
      const max = Math.max(...counts);
      if (max < 2) continue;
      const consistency = counts.filter(c => c === max).length / counts.length;
      const score = max * consistency;
      if (score > bestScore) { bestScore = score; best = d; }
    }
    return best;
  }

  function splitCSV(line, delim) {
    const out = []; let cur = ''; let inQ = false;
    for (let i = 0; i < line.length; i++) {
      const c = line[i];
      if (c === '"') {
        if (inQ && line[i+1] === '"') { cur += '"'; i++; }
        else inQ = !inQ;
      } else if (c === delim && !inQ) {
        out.push(cur); cur = '';
      } else cur += c;
    }
    out.push(cur);
    return out;
  }

  function sniffFormat(name, text) {
    const ext = (name.match(/\.([a-z0-9]+)$/i) || [,''])[1].toLowerCase();
    if (ext === 'json') return 'json';
    if (ext === 'xml' || /^<\?xml|^<DDEX|^<NewReleaseMessage/i.test(text.trim())) return 'xml';
    if (ext === 'tsv') return 'tsv';
    if (/^HDR/.test(text) && /^GRH/m.test(text)) return 'cwr';
    if (text.startsWith('{') || text.startsWith('[')) return 'json';
    return 'csv';
  }

  // ════════════════════════════════════════════════════════════════
  // PARSING — returns { headers, rows } regardless of input format
  // ════════════════════════════════════════════════════════════════
  function parseCSV(text, delim) {
    delim = delim || sniffDelimiter(text);
    const lines = [];
    let buf = ''; let inQ = false;
    for (let i = 0; i < text.length; i++) {
      const c = text[i];
      if (c === '"') {
        if (inQ && text[i+1] === '"') { buf += '"'; i++; }
        else inQ = !inQ; buf += c;
      } else if ((c === '\n' || c === '\r') && !inQ) {
        if (buf) lines.push(buf);
        buf = '';
        if (c === '\r' && text[i+1] === '\n') i++;
      } else buf += c;
    }
    if (buf) lines.push(buf);
    if (lines.length === 0) return { headers: [], rows: [], delim };
    const headers = splitCSV(lines[0], delim).map(h => h.trim().replace(/^"|"$/g, ''));
    const rows = lines.slice(1).map((l) => {
      const vals = splitCSV(l, delim).map(v => v.trim().replace(/^"|"$/g, ''));
      const r = {};
      headers.forEach((h, i) => r[h] = vals[i] != null ? vals[i] : '');
      return r;
    });
    return { headers, rows, delim };
  }

  function parseJSON(text) {
    try {
      const j = JSON.parse(text);
      const arr = Array.isArray(j) ? j :
                  Array.isArray(j.records) ? j.records :
                  Array.isArray(j.data) ? j.data :
                  Array.isArray(j.items) ? j.items :
                  // Find the first array-of-objects property
                  Object.values(j).find(v => Array.isArray(v) && v[0] && typeof v[0] === 'object') ||
                  [];
      if (!Array.isArray(arr) || arr.length === 0) return { headers: [], rows: [] };
      const headerSet = new Set();
      arr.forEach(o => Object.keys(o || {}).forEach(k => headerSet.add(k)));
      const headers = [...headerSet];
      return { headers, rows: arr };
    } catch (e) {
      return { headers: [], rows: [], error: 'invalid-json' };
    }
  }

  // Naive DDEX/CWR parsers — extract row-shaped data from XML/CWR text
  function parseXML(text) {
    // Match repeating <SoundRecording>, <Release>, <Resource>, <MusicalWork> blocks.
    const blockTags = ['SoundRecording','MusicalWork','Release','ReleaseGroup','Party','Artist','Track','Resource'];
    let bestTag = null, bestCount = 0;
    for (const t of blockTags) {
      const re = new RegExp(`<${t}[\\s>]`, 'g');
      const m = text.match(re);
      if (m && m.length > bestCount) { bestCount = m.length; bestTag = t; }
    }
    if (!bestTag) return { headers: [], rows: [], error: 'no-blocks' };
    const reBlock = new RegExp(`<${bestTag}[^>]*>([\\s\\S]*?)</${bestTag}>`, 'g');
    const rows = [];
    const headerSet = new Set();
    let m;
    while ((m = reBlock.exec(text))) {
      const body = m[1];
      const fields = {};
      // Extract leaf elements: <Tag>value</Tag>
      const reLeaf = /<([A-Za-z][\w:-]*)\s*(?:[^>]*)?>([^<]+)<\/\1>/g;
      let lm;
      while ((lm = reLeaf.exec(body))) {
        const key = lm[1].replace(/^.*:/, '');
        if (!fields[key]) fields[key] = lm[2].trim();
        headerSet.add(key);
      }
      rows.push(fields);
    }
    return { headers: [...headerSet], rows, blockTag: bestTag };
  }

  function parseCWR(text) {
    // Quick CWR sniff: NWR (work) or REV records — extract a few common fixed-width fields.
    const lines = text.split(/\r?\n/);
    const rows = [];
    for (const ln of lines) {
      if (!/^(NWR|REV|ISW)/.test(ln)) continue;
      // Fixed CWR positions for v2.x
      const workTitle = ln.substr(19, 60).trim();
      const iswc = ln.substr(79, 11).trim();
      const lang = ln.substr(90, 2).trim();
      const dur = ln.substr(106, 6).trim();
      const recIndic = ln.substr(112, 1).trim();
      rows.push({
        'Work Title': workTitle,
        'ISWC': iswc,
        'Language': lang,
        'Duration': dur,
        'Recorded Indicator': recIndic,
      });
    }
    return { headers: ['Work Title','ISWC','Language','Duration','Recorded Indicator'], rows };
  }

  function parse(name, text) {
    const fmt = sniffFormat(name, text);
    if (fmt === 'json') return { ...parseJSON(text), format: fmt };
    if (fmt === 'xml')  return { ...parseXML(text), format: fmt };
    if (fmt === 'cwr')  return { ...parseCWR(text), format: fmt };
    return { ...parseCSV(text, fmt === 'tsv' ? '\t' : null), format: fmt };
  }

  // ════════════════════════════════════════════════════════════════
  // ENTITY DETECTION — given headers, score each schema for fit
  // ════════════════════════════════════════════════════════════════
  function fuzzy(a, b) {
    a = a.toLowerCase().replace(/[\s_\-()]+/g, '');
    b = b.toLowerCase().replace(/[\s_\-()]+/g, '');
    if (a === b) return 1;
    if (a.includes(b) || b.includes(a)) return 0.85;
    // Simple bigram overlap
    const bg = (s) => { const r = new Set(); for (let i=0;i<s.length-1;i++) r.add(s.slice(i,i+2)); return r; };
    const A = bg(a), B = bg(b);
    if (A.size === 0 || B.size === 0) return 0;
    let inter = 0; A.forEach(x => B.has(x) && inter++);
    return inter * 2 / (A.size + B.size);
  }

  function detectEntity(headers) {
    const scores = {};
    for (const [eKey, schema] of Object.entries(SCHEMAS)) {
      let score = 0;
      let hits = 0;
      for (const f of schema.fields) {
        const best = bestHeaderForField(headers, f);
        if (best.score > 0.55) {
          score += best.score * (f.req ? 2 : 1);
          hits++;
        }
      }
      // Penalize if no required fields matched
      const reqCount = schema.fields.filter(f => f.req).length;
      const reqHits = schema.fields.filter(f => f.req && bestHeaderForField(headers, f).score > 0.55).length;
      if (reqHits < reqCount) score *= 0.5;
      scores[eKey] = { score, hits };
    }
    const sorted = Object.entries(scores).sort((a,b) => b[1].score - a[1].score);
    return { picked: sorted[0]?.[0] || 'works', scores };
  }

  function bestHeaderForField(headers, field) {
    let best = { header: null, score: 0 };
    for (const h of headers) {
      // Match against the canonical key or any synonym
      let s = fuzzy(h, field.k);
      for (const syn of field.syn || []) {
        s = Math.max(s, fuzzy(h, syn));
      }
      if (s > best.score) best = { header: h, score: s };
    }
    return best;
  }

  function suggestMapping(headers, entity) {
    const schema = SCHEMAS[entity];
    if (!schema) return {};
    const out = {};
    const used = new Set();
    // First pass: highest scoring matches
    const candidates = [];
    for (const f of schema.fields) {
      for (const h of headers) {
        let s = fuzzy(h, f.k);
        for (const syn of f.syn || []) s = Math.max(s, fuzzy(h, syn));
        candidates.push({ field: f.k, header: h, score: s });
      }
    }
    candidates.sort((a,b) => b.score - a.score);
    for (const c of candidates) {
      if (c.score < 0.5) continue;
      if (out[c.field] || used.has(c.header)) continue;
      out[c.field] = c.header;
      used.add(c.header);
    }
    return out;
  }

  // ════════════════════════════════════════════════════════════════
  // VALIDATION — per-row + per-batch
  // ════════════════════════════════════════════════════════════════
  const VALIDATORS = {
    string: (v) => v == null ? null : (String(v).length > 1024 ? 'too-long' : null),
    iswc:   (v) => !v ? null : (/^T-?\d{3}\.?\d{3}\.?\d{3}-?\d$/.test(String(v).replace(/[\s.\-]/g,'').replace(/^T?/,'T')) ? null : 'invalid-iswc'),
    isrc:   (v) => !v ? null : (/^[A-Z]{2}-?[A-Z0-9]{3}-?\d{2}-?\d{5}$/.test(String(v).replace(/[\s\-]/g,'')) ? null : 'invalid-isrc'),
    upc:    (v) => !v ? null : (/^\d{12,14}$/.test(String(v).replace(/[\s\-]/g,'')) ? null : 'invalid-upc'),
    ipi:    (v) => !v ? null : (/^\d{9,11}$/.test(String(v).replace(/[\s\-]/g,'')) ? null : 'invalid-ipi'),
    iso2:   (v) => !v ? null : (/^[A-Z]{2}$/.test(String(v).toUpperCase()) ? null : 'invalid-territory'),
    iso4217:(v) => !v ? null : (/^[A-Z]{3}$/.test(String(v).toUpperCase()) ? null : 'invalid-currency'),
    yn:     (v) => !v ? null : (/^(y|n|yes|no|true|false|1|0)$/i.test(String(v)) ? null : 'invalid-yn'),
    number: (v) => !v ? null : (isNaN(Number(String(v).replace(/[,$ ]/g,''))) ? 'invalid-number' : null),
    pct:    (v) => !v ? null : (() => { const n = Number(String(v).replace(/[%,]/g,'')); return (isNaN(n) || n < 0 || n > 100) ? 'invalid-pct' : null; })(),
    date:   (v) => !v ? null : (isNaN(Date.parse(v)) ? 'invalid-date' : null),
    duration:(v) => !v ? null : (/^\d+:\d{2}(:\d{2})?$/.test(v) || /^\d+$/.test(v) ? null : 'invalid-duration'),
    enum:   (v, f) => !v ? null : (f.values && !f.values.some(x => x.toLowerCase() === String(v).toLowerCase()) ? 'invalid-enum' : null),
    url:    (v) => !v ? null : (/^https?:\/\//.test(v) ? null : 'invalid-url'),
    id:     (v) => !v ? null : (String(v).length > 100 ? 'id-too-long' : null),
  };

  function validateRow(row, mapping, entity) {
    const schema = SCHEMAS[entity];
    if (!schema) return { errors: [], warnings: [] };
    const errors = []; const warnings = [];
    for (const f of schema.fields) {
      const src = mapping[f.k];
      const val = src ? row[src] : '';
      if (f.req && (!val || String(val).trim() === '')) {
        errors.push({ field: f.k, kind: 'missing-required', value: '' });
        continue;
      }
      const v = VALIDATORS[f.t];
      if (v && val) {
        const err = v(val, f);
        if (err) {
          // ENUM is only a warning if value is in synonym range
          (err === 'invalid-enum' || err.startsWith('invalid-yn') ? warnings : errors)
            .push({ field: f.k, kind: err, value: String(val).slice(0, 60) });
        }
      }
    }
    return { errors, warnings };
  }

  // ════════════════════════════════════════════════════════════════
  // DEDUP — match rows against existing RS by natural key first,
  // then fuzzy title-match. Returns 'create' | 'update' | 'duplicate'.
  // ════════════════════════════════════════════════════════════════
  function existingByNatural(entity) {
    const RS = window.RS || {};
    const map = new Map();
    if (entity === 'works' && RS.works) {
      RS.works.forEach(w => {
        const iswc = (w.ISWC || '').replace(/[.\-]/g,'');
        if (iswc) map.set(iswc, w);
      });
    } else if (entity === 'recordings' && RS.recordings) {
      RS.recordings.forEach(r => {
        const isrc = (r['ISRC (Audio)'] || '').replace(/[.\-]/g,'');
        if (isrc) map.set(isrc, r);
      });
    } else if (entity === 'releases' && RS.releases) {
      RS.releases.forEach(r => {
        const upc = (r['UPC (Release)'] || '').replace(/[.\-]/g,'');
        if (upc) map.set(upc, r);
      });
    } else if (entity === 'profiles' && RS.profiles) {
      RS.profiles.forEach(p => {
        if (p.ipi_name_number) map.set(String(p.ipi_name_number), p);
      });
    } else if (entity === 'publishers' && RS.publishers) {
      RS.publishers.forEach(p => {
        if (p.ipi) map.set(String(p.ipi), p);
        if (p.name) map.set('NAME:' + p.name.toLowerCase(), p);
      });
    }
    return map;
  }

  function dedupResolve(row, mapping, entity) {
    const idx = existingByNatural(entity);
    let key = '';
    if (entity === 'works')        key = (row[mapping['ISWC']] || '').replace(/[.\-]/g,'');
    else if (entity === 'recordings') key = (row[mapping['ISRC (Audio)']] || '').replace(/[.\-]/g,'');
    else if (entity === 'releases')   key = (row[mapping['UPC (Release)']] || '').replace(/[.\-]/g,'');
    else if (entity === 'profiles')   key = String(row[mapping['ipi_name_number']] || '');
    else if (entity === 'publishers') {
      const ipi = String(row[mapping['ipi']] || '');
      key = ipi || ('NAME:' + String(row[mapping['name']] || '').toLowerCase());
    }
    if (key && idx.has(key)) return { action: 'update', existing: idx.get(key), key };
    // Fuzzy title fallback (works/recordings/releases)
    if (['works','recordings','releases'].includes(entity)) {
      const RS = window.RS || {};
      const titleField = entity === 'works' ? 'Work Title'
                        : entity === 'recordings' ? 'Track Name' : 'Release Name';
      const incomingTitle = (row[mapping[titleField]] || '').toLowerCase().trim();
      if (incomingTitle && incomingTitle.length > 3) {
        const list = RS[entity] || [];
        const hit = list.find(x => (x[titleField] || '').toLowerCase().trim() === incomingTitle);
        if (hit) return { action: 'duplicate-title', existing: hit, key: incomingTitle };
      }
    }
    return { action: 'create', existing: null, key };
  }

  // ════════════════════════════════════════════════════════════════
  // FULL PIPELINE
  // ════════════════════════════════════════════════════════════════
  function runPipeline({ name, text, entityOverride, mappingOverride, simulate }) {
    const t0 = performance.now();
    const parsed = parse(name, text);
    if (parsed.error || !parsed.rows || parsed.rows.length === 0) {
      return {
        ok: false, ms: performance.now() - t0,
        format: parsed.format, error: parsed.error || 'no-rows',
        headers: parsed.headers || [], rows: [], stats: {},
      };
    }
    const detected = detectEntity(parsed.headers);
    const entity = entityOverride || detected.picked;
    const mapping = mappingOverride || suggestMapping(parsed.headers, entity);
    const schema = SCHEMAS[entity];
    const rows = parsed.rows.slice(0, simulate ? Math.min(parsed.rows.length, 50_000) : parsed.rows.length);

    let createCount = 0, updateCount = 0, dupCount = 0;
    let errCount = 0, warnCount = 0;
    const findings = [];
    const sample = []; // first 250 rows enriched for preview

    rows.forEach((row, i) => {
      const v = validateRow(row, mapping, entity);
      const d = dedupResolve(row, mapping, entity);
      if (v.errors.length > 0) errCount++;
      if (v.warnings.length > 0) warnCount++;
      if (d.action === 'create') createCount++;
      else if (d.action === 'update') updateCount++;
      else dupCount++;

      v.errors.forEach(e => findings.push({ row: i, sev: 'error', ...e }));
      v.warnings.forEach(w => findings.push({ row: i, sev: 'warn', ...w }));

      if (i < 250) {
        sample.push({
          rowIndex: i,
          row,
          dedup: d,
          errors: v.errors,
          warnings: v.warnings,
        });
      }
    });

    const t1 = performance.now();
    return {
      ok: true,
      ms: t1 - t0,
      format: parsed.format,
      entity,
      entityLabel: schema.label,
      detected,
      headers: parsed.headers,
      mapping,
      rowCount: rows.length,
      stats: {
        create: createCount,
        update: updateCount,
        duplicate: dupCount,
        errors: errCount,
        warnings: warnCount,
        rowsWithIssues: findings.length,
      },
      sample,
      findings: findings.slice(0, 2000),
    };
  }

  // ════════════════════════════════════════════════════════════════
  // COMMIT — applies the run to RS in-memory (session-only)
  // ════════════════════════════════════════════════════════════════
  function commit(result) {
    if (!result || !result.ok) return { ok: false, applied: 0 };
    const RS = window.RS || {};
    const target = result.entity;
    const list = RS[target] || (RS[target] = []);
    let created = 0, updated = 0, skipped = 0;
    result.sample.forEach(s => {
      if (s.errors.length > 0) { skipped++; return; }
      if (s.dedup.action === 'create') {
        const newRow = {};
        Object.entries(result.mapping).forEach(([fieldKey, header]) => {
          newRow[fieldKey] = s.row[header] || '';
        });
        list.push(newRow);
        created++;
      } else if (s.dedup.action === 'update' && s.dedup.existing) {
        Object.entries(result.mapping).forEach(([fieldKey, header]) => {
          if (s.row[header]) s.dedup.existing[fieldKey] = s.row[header];
        });
        updated++;
      } else skipped++;
    });
    return { ok: true, created, updated, skipped, applied: created + updated };
  }

  // ════════════════════════════════════════════════════════════════
  // SEED RUN HISTORY (deterministic)
  // ════════════════════════════════════════════════════════════════
  function seedRunHistory() {
    if (window.__BULK_IMPORT_RUNS) return window.__BULK_IMPORT_RUNS;
    const now = Date.now();
    const runs = [
      { id:'bi_010', fileName:'rocket_science_works_2024_export.csv', entity:'works', rowCount:553, createCount:0, updateCount:553, dupCount:0, errCount:8, warnCount:124, status:'partial', startedAt: now - 86400_000*1, ms: 4180, user:'Avery Cohen', format:'csv', source:'Airtable export' },
      { id:'bi_009', fileName:'pluralis_writers_legacy.csv',           entity:'profiles', rowCount:419, createCount:312, updateCount:91, dupCount:16, errCount:0, warnCount:42, status:'ok', startedAt: now - 86400_000*3, ms: 2890, user:'Avery Cohen', format:'csv', source:'Legacy DB dump' },
      { id:'bi_008', fileName:'half_tone_recordings_master.tsv',       entity:'recordings', rowCount:1035, createCount:48, updateCount:962, dupCount:25, errCount:14, warnCount:201, status:'partial', startedAt: now - 86400_000*5, ms: 6700, user:'Mira Tan', format:'tsv', source:'Half-Tone Records' },
      { id:'bi_007', fileName:'q3_releases_billboard_chart_2024.json', entity:'releases', rowCount:391, createCount:88, updateCount:288, dupCount:15, errCount:0, warnCount:31, status:'ok', startedAt: now - 86400_000*7, ms: 3120, user:'Avery Cohen', format:'json', source:'Internal API' },
      { id:'bi_006', fileName:'sub_publisher_deals_2024.csv',          entity:'agreements', rowCount:14, createCount:14, updateCount:0, dupCount:0, errCount:0, warnCount:2, status:'ok', startedAt: now - 86400_000*10, ms: 480, user:'L. Kaur', format:'csv', source:'Manual entry' },
      { id:'bi_005', fileName:'distrokid_isrcs_dec2024.csv',           entity:'recordings', rowCount:208, createCount:208, updateCount:0, dupCount:0, errCount:0, warnCount:8, status:'ok', startedAt: now - 86400_000*14, ms: 1180, user:'Avery Cohen', format:'csv', source:'DistroKid' },
      { id:'bi_004', fileName:'unknown_format_2024.txt',               entity:null, rowCount:0, createCount:0, updateCount:0, dupCount:0, errCount:1, warnCount:0, status:'failed', startedAt: now - 86400_000*16, ms: 80, user:'Avery Cohen', format:'csv', source:'(failed)', failureReason:'Unable to detect entity type — sniff confidence below 0.4' },
      { id:'bi_003', fileName:'ddex_release_43_batch.xml',             entity:'releases', rowCount:48, createCount:36, updateCount:11, dupCount:1, errCount:0, warnCount:6, status:'ok', startedAt: now - 86400_000*20, ms: 2200, user:'Mira Tan', format:'xml', source:'DDEX 4.3' },
      { id:'bi_002', fileName:'cwr_NWR_export_q2.v21',                 entity:'works', rowCount:267, createCount:13, updateCount:254, dupCount:0, errCount:0, warnCount:18, status:'ok', startedAt: now - 86400_000*25, ms: 1640, user:'Avery Cohen', format:'cwr', source:'CWR v2.1' },
      { id:'bi_001', fileName:'youtube_channel_videos_export.json',    entity:'videos', rowCount:88, createCount:62, updateCount:23, dupCount:3, errCount:0, warnCount:5, status:'ok', startedAt: now - 86400_000*30, ms: 920, user:'Avery Cohen', format:'json', source:'YouTube Data API' },
    ];
    window.__BULK_IMPORT_RUNS = runs;
    return runs;
  }

  // ════════════════════════════════════════════════════════════════
  // EXPORTS
  // ════════════════════════════════════════════════════════════════
  window.BULK_IMPORT_ENGINE = {
    SCHEMAS,
    parse, sniffFormat, sniffDelimiter,
    detectEntity, suggestMapping, bestHeaderForField,
    validateRow, dedupResolve, runPipeline, commit,
    seedRunHistory,
  };
})();
