// ml-cluster.jsx — Catalog clustering & auto-tagging
// ────────────────────────────────────────────────────────────────────
// Embeds catalog into a low-dim space using audio features + metadata,
// runs k-means to discover clusters, auto-labels each cluster with:
//   - Mood (chill, hype, sad, anthemic, dark, romantic, ...)
//   - Genre cluster ("synthpop", "trap-soul", "reggaeton-fusion", ...)
//   - Use-case tags (sync-ready: cinematic / drama / comedy / commercial / sports)
//
// EXPORT: window.ClusterEngine
// ────────────────────────────────────────────────────────────────────
(function () {
  if (typeof window === 'undefined') return;

  function cseed(s) {
    s = String(s || 'x'); let h = 0;
    for (let i = 0; i < s.length; i++) h = (h * 31 + s.charCodeAt(i)) >>> 0;
    let a = h ^ 0x9e3779b9, b = h ^ 0xdeadbeef, c = h ^ 0x41c6ce57, d = h ^ 0x6b79f5d3;
    return function () {
      a |= 0; b |= 0; c |= 0; d |= 0;
      const t = (((a + b) | 0) + d) | 0; d = (d + 1) | 0;
      a = b ^ (b >>> 9); b = (c + (c << 3)) | 0; c = (c << 21 | c >>> 11);
      c = (c + t) | 0;
      return (t >>> 0) / 4294967296;
    };
  }

  // ─── Build feature vector per recording ─────────────────────────
  // 8 dims: energy, valence, danceability, acousticness, instrumentalness, bpm/200, loudness/-60, speechiness
  function vec(rec) {
    let f = window.PredictEngine?.getFeatures?.(rec)?.audio;
    if (!f) {
      const rng = cseed(rec.id || rec.title || 'x');
      f = {
        bpm: 85 + rng() * 70,
        energy: 0.45 + rng() * 0.5,
        valence: 0.2 + rng() * 0.7,
        danceability: 0.4 + rng() * 0.55,
        acousticness: rng() * 0.6,
        instrumentalness: rng() * 0.35,
      };
    }
    const rng2 = cseed('extra·' + rec.id);
    const speechiness = rng2() * 0.5;
    const loudness = -16 + rng2() * 12; // dB
    return [
      f.energy ?? 0.5,
      f.valence ?? 0.5,
      f.danceability ?? 0.5,
      f.acousticness ?? 0.3,
      f.instrumentalness ?? 0.1,
      ((f.bpm ?? 120) - 60) / 140,
      (loudness + 24) / 24,
      speechiness,
    ];
  }

  // ─── Distance metric ────────────────────────────────────────────
  function dist(a, b) {
    let s = 0;
    for (let i = 0; i < a.length; i++) s += (a[i] - b[i]) ** 2;
    return Math.sqrt(s);
  }

  // ─── k-means (Lloyd's algorithm) ───────────────────────────────
  function kmeans(points, k, opts) {
    opts = opts || {};
    const maxIter = opts.maxIter || 30;
    const seed = opts.seed || 'kmeans';
    const rng = cseed(seed);
    const dim = points[0].length;
    // Init: pick k random distinct points
    const centers = [];
    const used = new Set();
    while (centers.length < k && used.size < points.length) {
      const i = Math.floor(rng() * points.length);
      if (!used.has(i)) { used.add(i); centers.push(points[i].slice()); }
    }

    let assignments = new Array(points.length).fill(0);
    for (let iter = 0; iter < maxIter; iter++) {
      let changed = 0;
      // Assign
      for (let i = 0; i < points.length; i++) {
        let best = 0, bd = Infinity;
        for (let j = 0; j < k; j++) {
          const d = dist(points[i], centers[j]);
          if (d < bd) { bd = d; best = j; }
        }
        if (assignments[i] !== best) { changed += 1; assignments[i] = best; }
      }
      // Update
      const sums = Array.from({ length: k }, () => new Array(dim).fill(0));
      const counts = new Array(k).fill(0);
      for (let i = 0; i < points.length; i++) {
        const c = assignments[i];
        counts[c] += 1;
        for (let d = 0; d < dim; d++) sums[c][d] += points[i][d];
      }
      for (let j = 0; j < k; j++) {
        if (counts[j] > 0) {
          for (let d = 0; d < dim; d++) centers[j][d] = sums[j][d] / counts[j];
        }
      }
      if (!changed) break;
    }
    return { centers, assignments };
  }

  // ─── 2D projection (PCA-ish) for visualization ─────────────────
  // Cheap: project onto first 2 high-variance axes (energy vs valence is usually informative)
  function project2D(points) {
    return points.map(p => [
      p[0] * 0.6 + p[5] * 0.4,        // x: energy + tempo
      p[1] * 0.6 + (1 - p[3]) * 0.4,  // y: valence + electronic-ness
    ]);
  }

  // ─── Auto-label cluster centroid ───────────────────────────────
  function labelCluster(centroid) {
    const [energy, valence, dance, acoustic, instr, bpm, loud, speech] = centroid;
    const labels = [];
    let mood, genre, useCase;

    // Mood
    if (energy > 0.7 && valence > 0.6) mood = 'Hype';
    else if (energy > 0.65 && valence < 0.4) mood = 'Dark';
    else if (energy < 0.45 && valence > 0.55) mood = 'Warm';
    else if (energy < 0.4 && valence < 0.4) mood = 'Sad';
    else if (energy > 0.55 && valence > 0.5 && acoustic < 0.3) mood = 'Anthemic';
    else if (acoustic > 0.5) mood = 'Intimate';
    else if (dance > 0.7) mood = 'Dancefloor';
    else mood = 'Mellow';
    labels.push(mood);

    // Genre cluster
    if (dance > 0.7 && energy > 0.65 && acoustic < 0.2) genre = 'Electronic / EDM';
    else if (speech > 0.3 && bpm > 0.4) genre = 'Hip-Hop / Rap';
    else if (acoustic > 0.55 && instr < 0.2) genre = 'Singer-Songwriter';
    else if (energy > 0.6 && acoustic > 0.3 && dance < 0.55) genre = 'Indie Rock';
    else if (dance > 0.6 && valence > 0.55 && bpm > 0.5) genre = 'Pop';
    else if (instr > 0.4) genre = 'Instrumental / Score';
    else if (acoustic > 0.6 && valence < 0.5) genre = 'Folk / Americana';
    else if (energy > 0.7 && bpm > 0.55) genre = 'Rock / Alternative';
    else genre = 'Pop / R&B';
    labels.push(genre);

    // Sync use-cases
    const useCases = [];
    if (instr > 0.35) useCases.push('Cinematic / Score');
    if (energy > 0.7 && valence > 0.55) useCases.push('Sports / Action');
    if (energy < 0.45 && valence < 0.45) useCases.push('Drama');
    if (energy < 0.5 && valence > 0.6 && acoustic > 0.3) useCases.push('Lifestyle / Commercial');
    if (dance > 0.7 && valence > 0.6) useCases.push('Party / YA');
    if (acoustic > 0.5 && valence < 0.4) useCases.push('Indie Film');
    if (energy > 0.55 && dance < 0.5 && instr > 0.2) useCases.push('Trailer');

    return { mood, genre, useCases, labels };
  }

  // ─── Run full clustering on catalog ─────────────────────────────
  function clusterCatalog(opts) {
    opts = opts || {};
    const recs = opts.recordings || (window.__RECORDINGS || window.RECORDINGS || []).slice(0, opts.limit || 400);
    const k = opts.k || 8;

    const points = recs.map(vec);
    const { centers, assignments } = kmeans(points, k, { seed: 'astro-catalog' });
    const proj = project2D(points);

    const clusters = [];
    for (let j = 0; j < k; j++) {
      const members = [];
      for (let i = 0; i < points.length; i++) {
        if (assignments[i] === j) members.push({ rec: recs[i], idx: i, d: dist(points[i], centers[j]) });
      }
      members.sort((a, b) => a.d - b.d);
      const meta = labelCluster(centers[j]);
      clusters.push({
        id: 'C' + j,
        center: centers[j],
        ...meta,
        size: members.length,
        members,
        examples: members.slice(0, 5).map(m => m.rec),
        proj2D: project2D([centers[j]])[0],
      });
    }

    // Per-recording labels
    const tagMap = {};
    for (let i = 0; i < recs.length; i++) {
      const c = clusters[assignments[i]];
      tagMap[recs[i].id] = {
        cluster: c.id,
        mood: c.mood,
        genre: c.genre,
        useCases: c.useCases,
        x: proj[i][0],
        y: proj[i][1],
      };
    }

    return { clusters, tagMap, assignments, points, proj };
  }

  // ─── Find similar tracks (cosine on raw vector) ────────────────
  function similar(rec, opts) {
    opts = opts || {};
    const target = vec(rec);
    const all = opts.recordings || (window.__RECORDINGS || window.RECORDINGS || []).slice(0, 400);
    const scored = all.filter(r => r.id !== rec.id).map(r => ({
      rec: r,
      sim: 1 - dist(target, vec(r)) / 3,
    }));
    return scored.sort((a, b) => b.sim - a.sim).slice(0, opts.n || 12);
  }

  window.ClusterEngine = { clusterCatalog, similar, vec, project2D, labelCluster, kmeans, dist };
  console.log('[ClusterEngine] loaded · k-means + auto-tag');
})();
