added blockbench uv packer weights, js plugin and ml and training guidelines

2 months ago · 2094497543
commit 2094497543
10 changed files with 2173 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,15 @@
 # Generated training data — regenerate from your bbmodel corpus with extract_pairs.mjs
 ml/pairs.csv
 # Python
 __pycache__/
 *.py[cod]
 .venv/
 venv/
 # Node
 node_modules/
 # OS
 .DS_Store
 Thumbs.db
--- a/README.md
+++ b/README.md
@ -0,0 +1,78 @@
 # Blockbench UV Packer
 A Blockbench plugin that auto-packs per-face UV models with ML-assisted share-merge suggestions, mirror detection, multi-variant texture support, and pow2 canvas cropping. One button, end-to-end.
 Originally built for the Decocraft project's bbmodel corpus (~600 models). The ML model in `ml/share_model.json` is trained on that corpus; you can retrain on your own corpus with the scripts in `ml/`.
 ## What it does
 When you click **Tools → Pack UVs (Decocraft)**, the plugin:
 1. Collects all per-face-UV cubes in the project (skips box-UV cubes; works in mixed-mode projects)
 2. **Heuristic share-merge** — faces with the same UV rect (modulo flip) collapse to one packed slot. Median 80% rect dedupe across the training corpus.
 3. **ML share-merge suggestions** (if `share_model.json` is loaded) — surfaces candidate merges the heuristic missed. A pixel-similarity gate filters out false positives. Each suggestion shows a thumbnail preview before you accept.
 4. **Mirror-orientation detection** — for cross-rect merges, the planner samples each member's pixels in 4 orientations and picks the one that minimizes RGBA diff against the canonical, so mirror-painted symmetric regions stay mirror-correct after merging.
 5. **Skyline pack** — biggest-first, top-to-bottom, left-to-right. Outliner-group ordering clusters same-group faces. Auto-expands the working canvas if rects don't fit.
 6. **Variant texture rearrangement** — alternate skin textures (any texture in the project with no faces pointing to it) get the same pixel rearrangement as the driver texture, so swapping skins still lines up.
 7. **Pow2 crop** — final texture canvas is shrunk to next-power-of-2 of the actual packed bbox.
 8. Single Undo entry covers everything.
 ## Install
 1. In Blockbench: **File → Plugins → Load Plugin from File**
 2. Pick `decocraft_uv_packer.js`
 3. (One-time) **Tools → Load Share Model JSON** → pick both `ml/share_model.json` and `ml/inference_test_cases.json` (multi-select). The model is cached to `localStorage` after first load. The plugin self-tests the JS LightGBM evaluator against the test cases on every load — if it doesn't match LightGBM exactly, it refuses to use the model.
 ## Workflow
 - Open a project that uses per-face UVs
 - **Tools → Pack UVs (Decocraft)**
 - If the ML model finds candidates, a dialog opens with thumbnails. Uncheck any pair where the previews disagree visually. Click **Accept & Pack** (or **Pack without merging** to skip ML suggestions).
 - The plugin packs, crops to pow2, and rearranges variants. Toast reports `Packed N faces into M slots · canvas A×B → C×D`.
 ## Retraining the ML model on your own corpus
 If you have your own bbmodel collection and want a model trained on its conventions:
 ```sh
 # 1. Inspect the corpus statistics (optional)
 node analyze_uvs.mjs path/to/your/bbmodel/folder
 # 2. Extract face-pair training data (~250k labeled pairs from ~600 models)
 node ml/extract_pairs.mjs path/to/your/bbmodel/folder
 # → produces ml/pairs.csv (~30 MB on a 600-model corpus)
 # 3. Train (Python: pip install lightgbm pandas scikit-learn)
 cd ml
 python train_share_classifier.py
 # → updates share_model.json, eval_report.txt, inference_test_cases.json
 # 4. In Blockbench: Tools → Load Share Model JSON → pick the new files
 ```
 The training script splits **by model** (not by pair) for held-out evaluation — so the precision/recall numbers in `eval_report.txt` reflect actual generalization to unseen models, not just unseen pairs from already-seen models. The bundled `share_model.json` was trained on the Decocraft corpus and reaches AUC 0.985 / precision 0.96 @ recall 0.88 on 123 held-out models.
 ## File layout
 ```
 blockbench-uv-packer/
 ├── decocraft_uv_packer.js        Blockbench plugin (single file)
 ├── analyze_uvs.mjs               Corpus stats (per-model density, group overlap, share fraction…)
 └── ml/
    ├── extract_pairs.mjs         Build pairs.csv from a bbmodel directory
    ├── train_share_classifier.py LightGBM trainer + eval + JSON export
    ├── validate_js_eval.mjs      Verifies the JS evaluator matches LightGBM bit-for-bit
    ├── share_model.json          Trained model (350 trees, ~2.5 MB)
    ├── inference_test_cases.json 50 self-test cases for the plugin
    └── eval_report.txt           Precision/recall sweep + feature importance from last train
 ```
 ## Limitations / non-goals
 - **Per-face UV only.** Cubes set to box-UV mode are skipped (counted in the success toast).
 - **Same-dimension share-merges only.** The merger collapses faces whose source rects have the same dims (with optional 90°-mirror); it doesn't rotate-to-fit different aspect ratios.
 - **Reorient-cubes feature was attempted and dropped.** Auto-rotating cubes for tighter packing fights against either Blockbench's `cube.roll()` (which rotates visually, breaking model assemblies) or requires deriving the full per-(perm × slot) D4-symmetry texture transform (which kept hitting subtle bugs across iterations). If you want a cube reoriented, use Blockbench's built-in Transform → Rotate tools manually before running Pack UVs.
 ## License
 TBD — add the license your project uses.
--- a/analyze_uvs.mjs
+++ b/analyze_uvs.mjs
@ -0,0 +1,300 @@
 // One-off analyzer: scan every .bbmodel in a directory and report UV-layout
 // statistics that characterize the project's sorting/packing conventions.
 //
 // Usage:
 //   node analyze_uvs.mjs <path/to/bbmodel/directory>
 //   MODELS_DIR=path/to/dir node analyze_uvs.mjs
 import fs from 'node:fs';
 import path from 'node:path';
 const MODELS_DIR = path.resolve(
  process.argv[2] || process.env.MODELS_DIR || 'bbmodel'
 );
 if (!fs.existsSync(MODELS_DIR)) {
  console.error(`Models dir not found: ${MODELS_DIR}`);
  console.error('Pass it as an argument or set MODELS_DIR.');
  process.exit(1);
 }
 const FACE_NAMES = ['north', 'east', 'south', 'west', 'up', 'down'];
 function rectOf(uv) {
  const [x1, y1, x2, y2] = uv;
  return {
    x: Math.min(x1, x2),
    y: Math.min(y1, y2),
    w: Math.abs(x2 - x1),
    h: Math.abs(y2 - y1),
    flipX: x2 < x1,
    flipY: y2 < y1,
  };
 }
 function rectArea(r) {
  return r.w * r.h;
 }
 function bboxOf(rects) {
  if (!rects.length) return { x: 0, y: 0, w: 0, h: 0 };
  let x0 = Infinity, y0 = Infinity, x1 = -Infinity, y1 = -Infinity;
  for (const r of rects) {
    if (r.x < x0) x0 = r.x;
    if (r.y < y0) y0 = r.y;
    if (r.x + r.w > x1) x1 = r.x + r.w;
    if (r.y + r.h > y1) y1 = r.y + r.h;
  }
  return { x: x0, y: y0, w: x1 - x0, h: y1 - y0 };
 }
 // Walk outliner tree to record group-membership for each cube uuid
 function buildCubeGroupMap(outlinerNodes, parentName, out, depth = 0) {
  for (const node of outlinerNodes) {
    if (typeof node === 'string') {
      // leaf: cube uuid
      out.set(node, parentName || '__root__');
    } else if (node && Array.isArray(node.children)) {
      const groupName = node.name || `group@${depth}`;
      buildCubeGroupMap(node.children, groupName, out, depth + 1);
    }
  }
 }
 function rectsOverlap(a, b) {
  return !(a.x + a.w <= b.x || b.x + b.w <= a.x || a.y + a.h <= b.y || b.y + b.h <= a.y);
 }
 function rectsIdentical(a, b) {
  return a.x === b.x && a.y === b.y && a.w === b.w && a.h === b.h;
 }
 function analyzeModel(filePath) {
  const data = JSON.parse(fs.readFileSync(filePath, 'utf8'));
  if (data.meta?.box_uv === true) return { skipped: 'box_uv' };
  const uvW = data.resolution?.width ?? 16;
  const uvH = data.resolution?.height ?? 16;
  const elements = data.elements || [];
  const cubeGroup = new Map();
  buildCubeGroupMap(data.outliner || [], null, cubeGroup);
  // Per-cube faces
  const allRects = [];
  const groupRects = new Map(); // groupName -> [rects]
  for (const el of elements) {
    if (el.type && el.type !== 'cube') continue;
    if (!el.faces) continue;
    const groupName = cubeGroup.get(el.uuid) || '__root__';
    if (!groupRects.has(groupName)) groupRects.set(groupName, []);
    for (const fname of FACE_NAMES) {
      const face = el.faces[fname];
      if (!face || !face.uv) continue;
      const r = rectOf(face.uv);
      if (r.w === 0 || r.h === 0) continue;
      allRects.push({ ...r, cube: el.name, group: groupName, face: fname });
      groupRects.get(groupName).push({ ...r, cube: el.name, face: fname });
    }
  }
  if (!allRects.length) return { skipped: 'no_faces' };
  // Global bbox
  const globalBbox = bboxOf(allRects);
  const usedArea = allRects.reduce((s, r) => s + rectArea(r), 0);
  const bboxArea = globalBbox.w * globalBbox.h;
  const canvasArea = uvW * uvH;
  // Identical-rect dedupe — count how many distinct rect positions+sizes
  const uniqRects = new Set(allRects.map(r => `${r.x},${r.y},${r.w},${r.h}`));
  const sharedRectFraction = 1 - uniqRects.size / allRects.length;
  // Flip usage
  const flippedFraction =
    allRects.filter(r => r.flipX || r.flipY).length / allRects.length;
  // Group clustering: for each group, how tight is its bbox vs. group total area?
  const groupStats = [];
  for (const [name, rects] of groupRects.entries()) {
    if (!rects.length) continue;
    const bb = bboxOf(rects);
    const a = rects.reduce((s, r) => s + rectArea(r), 0);
    const bbA = bb.w * bb.h;
    groupStats.push({
      name,
      faces: rects.length,
      totalArea: a,
      bboxW: bb.w,
      bboxH: bb.h,
      bboxArea: bbA,
      density: bbA > 0 ? a / bbA : 0,
    });
  }
  // Group separation score: do groups overlap each other's bboxes?
  let groupBboxOverlapPairs = 0;
  for (let i = 0; i < groupStats.length; i++) {
    for (let j = i + 1; j < groupStats.length; j++) {
      const a = { x: 0, y: 0, w: 0, h: 0 }; // recompute
    }
  }
  // Better: actually compute group bboxes once and check overlap
  const gBboxes = [];
  for (const [name, rects] of groupRects.entries()) {
    if (!rects.length) continue;
    gBboxes.push({ name, ...bboxOf(rects) });
  }
  for (let i = 0; i < gBboxes.length; i++) {
    for (let j = i + 1; j < gBboxes.length; j++) {
      if (rectsOverlap(gBboxes[i], gBboxes[j])) groupBboxOverlapPairs++;
    }
  }
  const totalGroupPairs = (gBboxes.length * (gBboxes.length - 1)) / 2;
  const groupOverlapRatio = totalGroupPairs ? groupBboxOverlapPairs / totalGroupPairs : 0;
  // Sort within groups: do faces appear biggest-first in the outliner / file order?
  // Approximate: take group rects in file order, see how often successor area <= prev area.
  let monotoneDecreaseHits = 0, monotoneDecreaseTotal = 0;
  for (const rects of groupRects.values()) {
    for (let i = 1; i < rects.length; i++) {
      monotoneDecreaseTotal++;
      if (rectArea(rects[i]) <= rectArea(rects[i - 1])) monotoneDecreaseHits++;
    }
  }
  const biggestFirstScore = monotoneDecreaseTotal
    ? monotoneDecreaseHits / monotoneDecreaseTotal
    : 0;
  // Padding: nearest-neighbor gap between non-overlapping rects
  // For each rect, look at its right and bottom neighbors and measure gap
  // Approximate by sampling: spend min gap across all neighbors
  const sortedByX = [...allRects].sort((a, b) => a.y - b.y || a.x - b.x);
  let zeroGapCount = 0, smallGapCount = 0, totalGapsMeasured = 0;
  for (let i = 0; i < sortedByX.length; i++) {
    for (let j = i + 1; j < sortedByX.length; j++) {
      const a = sortedByX[i], b = sortedByX[j];
      if (b.y > a.y + a.h + 4) break;
      // Right-edge to left-edge gap when y-overlap exists
      const yOverlap = Math.min(a.y + a.h, b.y + b.h) - Math.max(a.y, b.y);
      if (yOverlap > 0) {
        const gap = b.x - (a.x + a.w);
        if (gap >= 0 && gap <= 4) {
          totalGapsMeasured++;
          if (gap === 0) zeroGapCount++;
          else if (gap <= 1) smallGapCount++;
        }
      }
    }
  }
  const zeroPaddingFraction = totalGapsMeasured ? zeroGapCount / totalGapsMeasured : 0;
  return {
    name: path.basename(filePath, '.bbmodel'),
    canvas: { uvW, uvH },
    cubes: elements.length,
    groups: gBboxes.length,
    faces: allRects.length,
    uniqueRects: uniqRects.size,
    sharedRectFraction: round(sharedRectFraction, 3),
    flippedFraction: round(flippedFraction, 3),
    globalBbox,
    bboxFillOfCanvas: round(bboxArea / canvasArea, 3),
    densityInBbox: round(usedArea / Math.max(bboxArea, 1), 3),
    groupStats: groupStats.map(g => ({ ...g, density: round(g.density, 3) })),
    groupOverlapRatio: round(groupOverlapRatio, 3),
    biggestFirstScore: round(biggestFirstScore, 3),
    zeroPaddingFraction: round(zeroPaddingFraction, 3),
    paddedSamples: totalGapsMeasured,
  };
 }
 function round(n, d) {
  return Math.round(n * 10 ** d) / 10 ** d;
 }
 const files = fs
  .readdirSync(MODELS_DIR)
  .filter(f => f.endsWith('.bbmodel'))
  .map(f => path.join(MODELS_DIR, f));
 const results = [];
 const skipped = { box_uv: 0, no_faces: 0, error: 0 };
 for (const f of files) {
  try {
    const r = analyzeModel(f);
    if (r.skipped) {
      skipped[r.skipped]++;
      continue;
    }
    results.push(r);
  } catch (e) {
    skipped.error++;
  }
 }
 // Aggregate
 function avg(xs) {
  return xs.length ? xs.reduce((a, b) => a + b, 0) / xs.length : 0;
 }
 function median(xs) {
  if (!xs.length) return 0;
  const s = [...xs].sort((a, b) => a - b);
  return s[Math.floor(s.length / 2)];
 }
 function pctile(xs, p) {
  if (!xs.length) return 0;
  const s = [...xs].sort((a, b) => a - b);
  return s[Math.floor((s.length - 1) * p)];
 }
 const summary = {
  totalAnalyzed: results.length,
  skipped,
  avgCubes: round(avg(results.map(r => r.cubes)), 1),
  avgGroups: round(avg(results.map(r => r.groups)), 1),
  avgFaces: round(avg(results.map(r => r.faces)), 1),
  medianBboxFillOfCanvas: round(median(results.map(r => r.bboxFillOfCanvas)), 3),
  medianDensityInBbox: round(median(results.map(r => r.densityInBbox)), 3),
  medianSharedRectFraction: round(median(results.map(r => r.sharedRectFraction)), 3),
  medianFlippedFraction: round(median(results.map(r => r.flippedFraction)), 3),
  medianGroupOverlapRatio: round(median(results.map(r => r.groupOverlapRatio)), 3),
  medianBiggestFirstScore: round(median(results.map(r => r.biggestFirstScore)), 3),
  medianZeroPadFraction: round(median(results.map(r => r.zeroPaddingFraction)), 3),
  // Distribution-of-density
  densityP10: round(pctile(results.map(r => r.densityInBbox), 0.1), 3),
  densityP50: round(pctile(results.map(r => r.densityInBbox), 0.5), 3),
  densityP90: round(pctile(results.map(r => r.densityInBbox), 0.9), 3),
  groupOverlapP10: round(pctile(results.map(r => r.groupOverlapRatio), 0.1), 3),
  groupOverlapP50: round(pctile(results.map(r => r.groupOverlapRatio), 0.5), 3),
  groupOverlapP90: round(pctile(results.map(r => r.groupOverlapRatio), 0.9), 3),
  biggestFirstP10: round(pctile(results.map(r => r.biggestFirstScore), 0.1), 3),
  biggestFirstP50: round(pctile(results.map(r => r.biggestFirstScore), 0.5), 3),
  biggestFirstP90: round(pctile(results.map(r => r.biggestFirstScore), 0.9), 3),
 };
 console.log('=== AGGREGATE ===');
 console.log(JSON.stringify(summary, null, 2));
 // Show 10 sample model breakdowns of varying complexity
 const samples = [...results].sort((a, b) => a.faces - b.faces);
 const picks = [
  samples[Math.floor(samples.length * 0.1)],
  samples[Math.floor(samples.length * 0.3)],
  samples[Math.floor(samples.length * 0.5)],
  samples[Math.floor(samples.length * 0.7)],
  samples[Math.floor(samples.length * 0.9)],
 ].filter(Boolean);
 console.log('\n=== SAMPLE BREAKDOWNS (low → high complexity) ===');
 for (const r of picks) {
  console.log(`\n--- ${r.name} ---`);
  console.log(`  canvas: ${r.canvas.uvW}x${r.canvas.uvH}, cubes: ${r.cubes}, groups: ${r.groups}, faces: ${r.faces}`);
  console.log(`  global UV bbox: ${r.globalBbox.w}x${r.globalBbox.h} @ (${r.globalBbox.x},${r.globalBbox.y})  fill=${r.bboxFillOfCanvas}  density=${r.densityInBbox}`);
  console.log(`  shared-rect fraction: ${r.sharedRectFraction}, flipped: ${r.flippedFraction}, zero-padding gaps: ${r.zeroPaddingFraction} (n=${r.paddedSamples})`);
  console.log(`  group-bbox overlap ratio: ${r.groupOverlapRatio}  biggest-first score: ${r.biggestFirstScore}`);
  if (r.groupStats.length <= 8) {
    for (const g of r.groupStats) {
      console.log(`    group "${g.name}": ${g.faces} faces, area=${g.totalArea}, bbox=${g.bboxW}x${g.bboxH}, density=${g.density}`);
    }
  }
 }
--- a/decocraft_uv_packer.js
+++ b/decocraft_uv_packer.js
--- a/ml/eval_report.txt
+++ b/ml/eval_report.txt
@ -0,0 +1,35 @@
 Held-out models: 123
 Held-out pairs:  41,713
 AUC:             0.9847
 Average prec:    0.9809
 Threshold sweep (held-out pairs):
  thresh      prec    recall        f1     kept%
    0.50    0.9250    0.9755    0.9496    52.77%
    0.70    0.9423    0.9490    0.9456    50.40%
    0.80    0.9550    0.9130    0.9335    47.84%
    0.85    0.9614    0.8828    0.9204    45.95%
    0.90    0.9681    0.8211    0.8886    42.44%
    0.95    0.9760    0.6656    0.7915    34.13%
 Top features by gain:
              area_ratio          984480
             aspect_diff          194900
              abs_w_diff           84968
              abs_h_diff           49857
               cube_dist           37537
                 a_cubeH           28154
                 a_cubeW           26844
                area_min           24794
             swap_w_diff           22231
             cube_w_diff           21646
             cube_h_diff           20308
                 a_cubeD           20095
             same_parent           18643
                 b_cubeD           18257
             cube_d_diff           16869
                 b_cubeH           16518
         direction_match           13923
                 b_cubeW           13617
                   a_uvH           12096
                   a_uvW           11763
--- a/ml/extract_pairs.mjs
+++ b/ml/extract_pairs.mjs
@ -0,0 +1,309 @@
 // Phase 1: extract face-pair training data for the share-prediction classifier.
 //
 // Usage:
 //   node extract_pairs.mjs <path/to/bbmodel/directory>
 //   MODELS_DIR=path/to/dir node extract_pairs.mjs
 // Output: pairs.csv next to this script (~250k rows on a 600-model corpus)
 //
 // Label: 1 if the two faces share a normalized UV rect within the same texture
 // in the source bbmodel, 0 otherwise. Negatives are HARD negatives — sampled
 // from the same model + biased toward the same parent or similar dimensions —
 // so the classifier learns "looks shareable but artist chose not to," not the
 // trivial "obviously different sizes."
 import fs from 'node:fs';
 import path from 'node:path';
 import { fileURLToPath } from 'node:url';
 const HERE = path.dirname(fileURLToPath(import.meta.url));
 const ROOT = path.resolve(process.argv[2] || process.env.MODELS_DIR || path.join(HERE, '..', 'bbmodel'));
 const OUT = path.join(HERE, 'pairs.csv');
 if (!fs.existsSync(ROOT)) {
  console.error(`Models dir not found: ${ROOT}`);
  console.error('Pass it as an argument or set MODELS_DIR.');
  process.exit(1);
 }
 const FACE_NAMES = ['north', 'east', 'south', 'west', 'up', 'down'];
 const DIR_IDX = Object.fromEntries(FACE_NAMES.map((n, i) => [n, i]));
 const OPPOSITE = { north: 'south', south: 'north', east: 'west', west: 'east', up: 'down', down: 'up' };
 const AXIS = { north: 'z', south: 'z', east: 'x', west: 'x', up: 'y', down: 'y' };
 const TARGET_POSITIVES_PER_MODEL = 200;
 const NEG_RATIO = 1.0;     // negatives per positive
 const RNG_SEED = 42;
 let rngState = RNG_SEED;
 function rand() {
  rngState = (rngState * 1664525 + 1013904223) >>> 0;
  return rngState / 0x100000000;
 }
 function shuffle(arr) {
  for (let i = arr.length - 1; i > 0; i--) {
    const j = Math.floor(rand() * (i + 1));
    [arr[i], arr[j]] = [arr[j], arr[i]];
  }
  return arr;
 }
 function buildCubeGroupMap(nodes, parentId, parentDepth, out, groupSeq) {
  for (const node of nodes) {
    if (typeof node === 'string') {
      out.set(node, { parentId, depth: parentDepth });
    } else if (node && Array.isArray(node.children)) {
      const id = node.uuid || `group_${groupSeq.n++}`;
      buildCubeGroupMap(node.children, id, parentDepth + 1, out, groupSeq);
    }
  }
 }
 function extractFacesFromModel(filePath) {
  const data = JSON.parse(fs.readFileSync(filePath, 'utf8'));
  if (data.meta?.box_uv === true) return null;
  const elements = data.elements || [];
  const cubeGroup = new Map();
  buildCubeGroupMap(data.outliner || [], null, 0, cubeGroup, { n: 0 });
  const faces = [];
  for (const el of elements) {
    if (el.type && el.type !== 'cube') continue;
    if (!el.faces) continue;
    const from = el.from || [0, 0, 0];
    const to = el.to || [0, 0, 0];
    const cubeW = Math.abs(to[0] - from[0]);
    const cubeH = Math.abs(to[1] - from[1]);
    const cubeD = Math.abs(to[2] - from[2]);
    const center = [
      (from[0] + to[0]) / 2,
      (from[1] + to[1]) / 2,
      (from[2] + to[2]) / 2,
    ];
    const rot = el.rotation || [0, 0, 0];
    const grp = cubeGroup.get(el.uuid) || { parentId: '__root__', depth: 0 };
    for (const fname of FACE_NAMES) {
      const face = el.faces[fname];
      if (!face || !face.uv) continue;
      const tex = face.texture;
      if (tex === null || tex === undefined || tex === false) continue;
      const [x1, y1, x2, y2] = face.uv;
      const w = Math.abs(x2 - x1);
      const h = Math.abs(y2 - y1);
      if (w === 0 || h === 0) continue;
      const minX = Math.min(x1, x2);
      const minY = Math.min(y1, y2);
      const texKey = String(tex);
      const shareKey = `${texKey}|${minX.toFixed(4)},${minY.toFixed(4)},${w.toFixed(4)},${h.toFixed(4)}`;
      faces.push({
        cube: el.uuid,
        cubeName: el.name || '',
        parentId: grp.parentId,
        depth: grp.depth,
        cubeW, cubeH, cubeD,
        cx: center[0], cy: center[1], cz: center[2],
        rotX: rot[0], rotY: rot[1], rotZ: rot[2],
        hasRot: rot[0] !== 0 || rot[1] !== 0 || rot[2] !== 0 ? 1 : 0,
        dir: fname,
        uvW: w,
        uvH: h,
        flipX: x2 < x1 ? 1 : 0,
        flipY: y2 < y1 ? 1 : 0,
        texKey,
        shareKey,
      });
    }
  }
  return faces;
 }
 function pairFeatures(a, b) {
  const aspectA = a.uvW / a.uvH;
  const aspectB = b.uvW / b.uvH;
  const areaA = a.uvW * a.uvH;
  const areaB = b.uvW * b.uvH;
  const dx = a.cx - b.cx;
  const dy = a.cy - b.cy;
  const dz = a.cz - b.cz;
  return {
    a_dir: DIR_IDX[a.dir],
    b_dir: DIR_IDX[b.dir],
    a_uvW: a.uvW, a_uvH: a.uvH,
    b_uvW: b.uvW, b_uvH: b.uvH,
    abs_w_diff: Math.abs(a.uvW - b.uvW),
    abs_h_diff: Math.abs(a.uvH - b.uvH),
    swap_w_diff: Math.abs(a.uvW - b.uvH),  // useful when shapes match after rotation
    swap_h_diff: Math.abs(a.uvH - b.uvW),
    area_min: Math.min(areaA, areaB),
    area_max: Math.max(areaA, areaB),
    area_ratio: Math.min(areaA, areaB) / Math.max(areaA, areaB),
    aspect_diff: Math.abs(aspectA - aspectB),
    a_cubeW: a.cubeW, a_cubeH: a.cubeH, a_cubeD: a.cubeD,
    b_cubeW: b.cubeW, b_cubeH: b.cubeH, b_cubeD: b.cubeD,
    cube_dim_match: (a.cubeW === b.cubeW && a.cubeH === b.cubeH && a.cubeD === b.cubeD) ? 1 : 0,
    cube_w_diff: Math.abs(a.cubeW - b.cubeW),
    cube_h_diff: Math.abs(a.cubeH - b.cubeH),
    cube_d_diff: Math.abs(a.cubeD - b.cubeD),
    same_cube: a.cube === b.cube ? 1 : 0,
    same_parent: a.parentId === b.parentId ? 1 : 0,
    direction_match: a.dir === b.dir ? 1 : 0,
    direction_opposite: OPPOSITE[a.dir] === b.dir ? 1 : 0,
    same_axis: AXIS[a.dir] === AXIS[b.dir] ? 1 : 0,
    a_axis: AXIS[a.dir],  // categorical
    b_axis: AXIS[b.dir],
    flip_match: (a.flipX === b.flipX && a.flipY === b.flipY) ? 1 : 0,
    has_rot_either: (a.hasRot || b.hasRot) ? 1 : 0,
    rot_match: (a.rotX === b.rotX && a.rotY === b.rotY && a.rotZ === b.rotZ) ? 1 : 0,
    cube_dist: Math.sqrt(dx * dx + dy * dy + dz * dz),
    same_texture: a.texKey === b.texKey ? 1 : 0,
    depth_diff: Math.abs(a.depth - b.depth),
  };
 }
 const FEATURE_KEYS = [
  'a_dir', 'b_dir',
  'a_uvW', 'a_uvH', 'b_uvW', 'b_uvH',
  'abs_w_diff', 'abs_h_diff', 'swap_w_diff', 'swap_h_diff',
  'area_min', 'area_max', 'area_ratio', 'aspect_diff',
  'a_cubeW', 'a_cubeH', 'a_cubeD', 'b_cubeW', 'b_cubeH', 'b_cubeD',
  'cube_dim_match', 'cube_w_diff', 'cube_h_diff', 'cube_d_diff',
  'same_cube', 'same_parent',
  'direction_match', 'direction_opposite', 'same_axis', 'a_axis', 'b_axis',
  'flip_match', 'has_rot_either', 'rot_match',
  'cube_dist', 'same_texture', 'depth_diff',
 ];
 const AXIS_IDX = { x: 0, y: 1, z: 2 };
 function encodeAxis(v) { return AXIS_IDX[v]; }
 function rowToCsv(modelName, label, feat) {
  const cells = [JSON.stringify(modelName), label];
  for (const k of FEATURE_KEYS) {
    let v = feat[k];
    if (k === 'a_axis' || k === 'b_axis') v = encodeAxis(v);
    cells.push(typeof v === 'number' ? (Number.isInteger(v) ? v : v.toFixed(4)) : v);
  }
  return cells.join(',');
 }
 function buildPairsForModel(faces, modelName) {
  // Group faces by share key (only considers within-texture sharing)
  const shareMap = new Map();
  for (let i = 0; i < faces.length; i++) {
    if (!shareMap.has(faces[i].shareKey)) shareMap.set(faces[i].shareKey, []);
    shareMap.get(faces[i].shareKey).push(i);
  }
  // Positive pairs: every (i, j) within a share group of size >= 2
  const positives = [];
  for (const idxs of shareMap.values()) {
    if (idxs.length < 2) continue;
    for (let i = 0; i < idxs.length; i++) {
      for (let j = i + 1; j < idxs.length; j++) {
        positives.push([idxs[i], idxs[j]]);
      }
    }
  }
  shuffle(positives);
  const positivesCapped = positives.slice(0, TARGET_POSITIVES_PER_MODEL);
  // Hard negatives: same texture, biased toward same-parent or close-dim faces.
  // Build candidate pool keyed by texture for fast lookup.
  const facesByTex = new Map();
  for (let i = 0; i < faces.length; i++) {
    const t = faces[i].texKey;
    if (!facesByTex.has(t)) facesByTex.set(t, []);
    facesByTex.get(t).push(i);
  }
  const positiveSet = new Set(positives.map(([a, b]) => a < b ? `${a},${b}` : `${b},${a}`));
  const targetNegatives = Math.max(positivesCapped.length, Math.round(positivesCapped.length * NEG_RATIO));
  const negatives = [];
  let attempts = 0;
  const maxAttempts = targetNegatives * 30;
  while (negatives.length < targetNegatives && attempts < maxAttempts) {
    attempts++;
    // Pick a face uniformly, then pick a partner from the same texture pool,
    // preferring same-parent.
    const a = Math.floor(rand() * faces.length);
    const fa = faces[a];
    const pool = facesByTex.get(fa.texKey);
    if (!pool || pool.length < 2) continue;
    // 60% chance: same-parent partner if possible; else any.
    let b;
    if (rand() < 0.6) {
      const sameParent = pool.filter((idx) => idx !== a && faces[idx].parentId === fa.parentId);
      if (sameParent.length) {
        b = sameParent[Math.floor(rand() * sameParent.length)];
      } else {
        b = pool[Math.floor(rand() * pool.length)];
      }
    } else {
      b = pool[Math.floor(rand() * pool.length)];
    }
    if (b === a) continue;
    const key = a < b ? `${a},${b}` : `${b},${a}`;
    if (positiveSet.has(key)) continue;
    // Optional: filter out blatant negatives (very different sizes) to avoid trivial training
    const fb = faces[b];
    const minA = Math.min(fa.uvW, fa.uvH), maxA = Math.max(fa.uvW, fa.uvH);
    const minB = Math.min(fb.uvW, fb.uvH), maxB = Math.max(fb.uvW, fb.uvH);
    const sizeRatio = Math.min(minA / maxB, minB / maxA);
    if (sizeRatio < 0.1) {
      // Allow some, but not too many
      if (rand() > 0.2) continue;
    }
    negatives.push([a, b]);
  }
  return { positives: positivesCapped, negatives, faces, modelName };
 }
 function main() {
  const files = fs.readdirSync(ROOT).filter((f) => f.endsWith('.bbmodel'));
  const writer = fs.createWriteStream(OUT);
  writer.write(['model', 'label', ...FEATURE_KEYS].join(',') + '\n');
  let totalPos = 0, totalNeg = 0, modelsUsed = 0, skipped = 0;
  for (const f of files) {
    const fp = path.join(ROOT, f);
    let faces;
    try {
      faces = extractFacesFromModel(fp);
    } catch (e) {
      skipped++;
      continue;
    }
    if (!faces || faces.length < 2) {
      skipped++;
      continue;
    }
    const modelName = path.basename(f, '.bbmodel');
    const { positives, negatives } = buildPairsForModel(faces, modelName);
    if (positives.length === 0) {
      skipped++;
      continue;
    }
    for (const [i, j] of positives) {
      writer.write(rowToCsv(modelName, 1, pairFeatures(faces[i], faces[j])) + '\n');
    }
    for (const [i, j] of negatives) {
      writer.write(rowToCsv(modelName, 0, pairFeatures(faces[i], faces[j])) + '\n');
    }
    totalPos += positives.length;
    totalNeg += negatives.length;
    modelsUsed++;
  }
  writer.end();
  console.log(`Wrote ${OUT}`);
  console.log(`Models: ${modelsUsed} used, ${skipped} skipped`);
  console.log(`Pairs: ${totalPos} positive, ${totalNeg} negative (${(totalPos / (totalPos + totalNeg) * 100).toFixed(1)}% positive)`);
  console.log(`Features: ${FEATURE_KEYS.length}`);
 }
 main();
--- a/ml/inference_test_cases.json
+++ b/ml/inference_test_cases.json
--- a/ml/share_model.json
+++ b/ml/share_model.json
--- a/ml/train_share_classifier.py
+++ b/ml/train_share_classifier.py
@ -0,0 +1,209 @@
 """Phase 2: train the share-prediction classifier.
 Setup (one-time):
    pip install lightgbm pandas scikit-learn
 Run (from this directory):
    python train_share_classifier.py
 Inputs:
    pairs.csv  (next to this script — generated by extract_pairs.mjs)
 Outputs:
    share_model.json          LightGBM tree dump for the in-plugin JS evaluator
    eval_report.txt           precision/recall sweep + feature importance
    inference_test_cases.json self-test cases the plugin verifies on load
 Held-out split is by MODEL, not by pair, so the eval numbers reflect
 generalization to unseen models — not just unseen pairs from already-seen models.
 """
 from __future__ import annotations
 import json
 import os
 from pathlib import Path
 import lightgbm as lgb
 import numpy as np
 import pandas as pd
 from sklearn.metrics import (
    average_precision_score,
    precision_recall_curve,
    roc_auc_score,
 )
 from sklearn.model_selection import GroupShuffleSplit
 ROOT = Path(__file__).resolve().parent
 PAIRS = ROOT / "pairs.csv"
 MODEL_OUT = ROOT / "share_model.json"
 REPORT_OUT = ROOT / "eval_report.txt"
 CATEGORICAL = ["a_dir", "b_dir", "a_axis", "b_axis"]
 LABEL = "label"
 GROUP = "model"
 # Operating thresholds we want precision/recall reported at.
 THRESHOLDS = [0.50, 0.70, 0.80, 0.85, 0.90, 0.95]
 def main() -> None:
    df = pd.read_csv(PAIRS)
    print(f"Loaded {len(df):,} pairs across {df[GROUP].nunique()} models")
    feature_cols = [c for c in df.columns if c not in (LABEL, GROUP)]
    X = df[feature_cols]
    y = df[LABEL].astype(int)
    groups = df[GROUP]
    # Group-aware 80/20 split — same model never in both train and test.
    splitter = GroupShuffleSplit(n_splits=1, test_size=0.20, random_state=42)
    train_idx, test_idx = next(splitter.split(X, y, groups))
    X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
    y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
    print(
        f"Train: {len(X_train):,} pairs / {df.iloc[train_idx][GROUP].nunique()} models  |"
        f"  Test: {len(X_test):,} pairs / {df.iloc[test_idx][GROUP].nunique()} models"
    )
    # Coerce categoricals
    for c in CATEGORICAL:
        X_train[c] = X_train[c].astype("category")
        X_test[c] = X_test[c].astype("category")
    train_set = lgb.Dataset(X_train, label=y_train, categorical_feature=CATEGORICAL)
    valid_set = lgb.Dataset(X_test, label=y_test, categorical_feature=CATEGORICAL, reference=train_set)
    params = {
        "objective": "binary",
        "metric": ["binary_logloss", "auc"],
        "learning_rate": 0.05,
        "num_leaves": 63,
        "min_data_in_leaf": 50,
        "feature_fraction": 0.9,
        "bagging_fraction": 0.8,
        "bagging_freq": 5,
        "verbose": -1,
    }
    model = lgb.train(
        params,
        train_set,
        num_boost_round=600,
        valid_sets=[train_set, valid_set],
        valid_names=["train", "valid"],
        callbacks=[
            lgb.early_stopping(stopping_rounds=30, verbose=True),
            lgb.log_evaluation(period=50),
        ],
    )
    # ---- Eval ----
    y_pred = model.predict(X_test, num_iteration=model.best_iteration)
    auc = roc_auc_score(y_test, y_pred)
    ap = average_precision_score(y_test, y_pred)
    lines = []
    lines.append(f"Held-out models: {df.iloc[test_idx][GROUP].nunique()}")
    lines.append(f"Held-out pairs:  {len(X_test):,}")
    lines.append(f"AUC:             {auc:.4f}")
    lines.append(f"Average prec:    {ap:.4f}")
    lines.append("")
    lines.append("Threshold sweep (held-out pairs):")
    lines.append(f"{'thresh':>8}  {'prec':>8}  {'recall':>8}  {'f1':>8}  {'kept%':>8}")
    precs, recs, thr = precision_recall_curve(y_test, y_pred)
    for t in THRESHOLDS:
        # find first threshold >= t
        mask = (y_pred >= t)
        kept = mask.mean()
        if mask.sum() == 0:
            lines.append(f"{t:>8.2f}  {'-':>8}  {'-':>8}  {'-':>8}  {kept * 100:>7.2f}%")
            continue
        tp = ((y_pred >= t) & (y_test == 1)).sum()
        fp = ((y_pred >= t) & (y_test == 0)).sum()
        fn = ((y_pred < t) & (y_test == 1)).sum()
        prec = tp / max(tp + fp, 1)
        rec = tp / max(tp + fn, 1)
        f1 = 2 * prec * rec / max(prec + rec, 1e-9)
        lines.append(f"{t:>8.2f}  {prec:>8.4f}  {rec:>8.4f}  {f1:>8.4f}  {kept * 100:>7.2f}%")
    lines.append("")
    lines.append("Top features by gain:")
    gain = model.feature_importance(importance_type="gain")
    names = model.feature_name()
    order = np.argsort(gain)[::-1]
    for i in order[:20]:
        lines.append(f"  {names[i]:>22}  {gain[i]:>14.0f}")
    report = "\n".join(lines)
    REPORT_OUT.write_text(report)
    print()
    print(report)
    print()
    print(f"Wrote {REPORT_OUT}")
    # ---- Export trees in a JS-evaluable form ----
    dump = model.dump_model(num_iteration=model.best_iteration)
    # Preserve feature_names and categorical info for JS eval; trim heavy fields.
    export = {
        "version": 1,
        "objective": "binary",
        "feature_names": dump["feature_names"],
        "categorical_features": [
            dump["feature_names"][i] for i in dump.get("pandas_categorical_index", [])
            if i < len(dump["feature_names"])
        ] if "pandas_categorical_index" in dump else CATEGORICAL,
        "best_iteration": int(model.best_iteration),
        "trees": [_compact_tree(t) for t in dump["tree_info"]],
    }
    MODEL_OUT.write_text(json.dumps(export, separators=(",", ":")))
    print(f"Wrote {MODEL_OUT}  ({MODEL_OUT.stat().st_size / 1024:.1f} KB, "
          f"{len(export['trees'])} trees)")
    # ---- Self-test cases for the JS evaluator ----
    # Pick 50 random held-out rows, save (feature dict, expected prediction)
    rng = np.random.default_rng(0)
    sample_idx = rng.choice(len(X_test), size=min(50, len(X_test)), replace=False)
    test_cases = []
    for i in sample_idx:
        row = X_test.iloc[i]
        features = {}
        for col in feature_cols:
            v = row[col]
            if col in CATEGORICAL:
                v = int(v)
            else:
                v = float(v)
            features[col] = v
        test_cases.append({
            "features": features,
            "expected_prob": float(y_pred[i]),
        })
    tests_out = ROOT / "inference_test_cases.json"
    tests_out.write_text(json.dumps(test_cases, separators=(",", ":")))
    print(f"Wrote {tests_out}  ({len(test_cases)} cases)")
 def _compact_tree(tree: dict) -> dict:
    """Strip the LightGBM tree to only fields the JS evaluator needs."""
    out = {"shrinkage": tree.get("shrinkage", 1.0), "root": _compact_node(tree["tree_structure"])}
    return out
 def _compact_node(node: dict) -> dict:
    if "leaf_value" in node:
        return {"v": node["leaf_value"]}
    out = {
        "f": node["split_feature"],
        "t": node["threshold"],
        "d": node["decision_type"],            # "<=" or "==" (categorical)
        "default_left": node.get("default_left", True),
        "missing_type": node.get("missing_type", "None"),
        "l": _compact_node(node["left_child"]),
        "r": _compact_node(node["right_child"]),
    }
    return out
 if __name__ == "__main__":
    main()
--- a/ml/validate_js_eval.mjs
+++ b/ml/validate_js_eval.mjs
@ -0,0 +1,55 @@
 // Validate the JS LightGBM evaluator against LightGBM's own predictions.
 // Run: node tools/blockbench-uv-packer/ml/validate_js_eval.mjs
 import fs from 'node:fs';
 import path from 'node:path';
 const ROOT = path.dirname(new URL(import.meta.url).pathname.replace(/^\//, ''));
 const model = JSON.parse(fs.readFileSync(path.join(ROOT, 'share_model.json'), 'utf8'));
 const tests = JSON.parse(fs.readFileSync(path.join(ROOT, 'inference_test_cases.json'), 'utf8'));
 function evalTree(node, x) {
  while (!('v' in node)) {
    const val = x[node.f];
    let goLeft;
    if (val === undefined || val === null || (typeof val === 'number' && isNaN(val))) {
      goLeft = node.default_left;
    } else if (node.d === '==') {
      const cats = String(node.t).split('||').map(Number);
      goLeft = cats.includes(val);
    } else {
      goLeft = val <= node.t;
    }
    node = goLeft ? node.l : node.r;
  }
  return node.v;
 }
 function predict(features) {
  const x = new Array(model.feature_names.length);
  for (let i = 0; i < model.feature_names.length; i++) {
    x[i] = features[model.feature_names[i]];
  }
  let raw = 0;
  for (const tree of model.trees) raw += evalTree(tree.root, x);
  return 1 / (1 + Math.exp(-raw));
 }
 let maxErr = 0;
 let worstCase = null;
 for (const tc of tests) {
  const got = predict(tc.features);
  const err = Math.abs(got - tc.expected_prob);
  if (err > maxErr) { maxErr = err; worstCase = { tc, got }; }
 }
 console.log(`tests: ${tests.length}`);
 console.log(`max abs error: ${maxErr.toExponential(4)}`);
 if (worstCase) {
  console.log(`worst: expected=${worstCase.tc.expected_prob.toFixed(6)} got=${worstCase.got.toFixed(6)}`);
 }
 if (maxErr > 0.005) {
  console.error('FAIL — JS evaluator disagrees with LightGBM');
  process.exit(1);
 }
 console.log('PASS');