added blockbench uv packer weights, js plugin and ml and training guidelines
commit
2094497543
@ -0,0 +1,15 @@
|
||||
# Generated training data — regenerate from your bbmodel corpus with extract_pairs.mjs
|
||||
ml/pairs.csv
|
||||
|
||||
# Python
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
.venv/
|
||||
venv/
|
||||
|
||||
# Node
|
||||
node_modules/
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
@ -0,0 +1,300 @@
|
||||
// One-off analyzer: scan every .bbmodel in a directory and report UV-layout
|
||||
// statistics that characterize the project's sorting/packing conventions.
|
||||
//
|
||||
// Usage:
|
||||
// node analyze_uvs.mjs <path/to/bbmodel/directory>
|
||||
// MODELS_DIR=path/to/dir node analyze_uvs.mjs
|
||||
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
|
||||
const MODELS_DIR = path.resolve(
|
||||
process.argv[2] || process.env.MODELS_DIR || 'bbmodel'
|
||||
);
|
||||
if (!fs.existsSync(MODELS_DIR)) {
|
||||
console.error(`Models dir not found: ${MODELS_DIR}`);
|
||||
console.error('Pass it as an argument or set MODELS_DIR.');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const FACE_NAMES = ['north', 'east', 'south', 'west', 'up', 'down'];
|
||||
|
||||
function rectOf(uv) {
|
||||
const [x1, y1, x2, y2] = uv;
|
||||
return {
|
||||
x: Math.min(x1, x2),
|
||||
y: Math.min(y1, y2),
|
||||
w: Math.abs(x2 - x1),
|
||||
h: Math.abs(y2 - y1),
|
||||
flipX: x2 < x1,
|
||||
flipY: y2 < y1,
|
||||
};
|
||||
}
|
||||
|
||||
function rectArea(r) {
|
||||
return r.w * r.h;
|
||||
}
|
||||
|
||||
function bboxOf(rects) {
|
||||
if (!rects.length) return { x: 0, y: 0, w: 0, h: 0 };
|
||||
let x0 = Infinity, y0 = Infinity, x1 = -Infinity, y1 = -Infinity;
|
||||
for (const r of rects) {
|
||||
if (r.x < x0) x0 = r.x;
|
||||
if (r.y < y0) y0 = r.y;
|
||||
if (r.x + r.w > x1) x1 = r.x + r.w;
|
||||
if (r.y + r.h > y1) y1 = r.y + r.h;
|
||||
}
|
||||
return { x: x0, y: y0, w: x1 - x0, h: y1 - y0 };
|
||||
}
|
||||
|
||||
// Walk outliner tree to record group-membership for each cube uuid
|
||||
function buildCubeGroupMap(outlinerNodes, parentName, out, depth = 0) {
|
||||
for (const node of outlinerNodes) {
|
||||
if (typeof node === 'string') {
|
||||
// leaf: cube uuid
|
||||
out.set(node, parentName || '__root__');
|
||||
} else if (node && Array.isArray(node.children)) {
|
||||
const groupName = node.name || `group@${depth}`;
|
||||
buildCubeGroupMap(node.children, groupName, out, depth + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function rectsOverlap(a, b) {
|
||||
return !(a.x + a.w <= b.x || b.x + b.w <= a.x || a.y + a.h <= b.y || b.y + b.h <= a.y);
|
||||
}
|
||||
|
||||
function rectsIdentical(a, b) {
|
||||
return a.x === b.x && a.y === b.y && a.w === b.w && a.h === b.h;
|
||||
}
|
||||
|
||||
function analyzeModel(filePath) {
|
||||
const data = JSON.parse(fs.readFileSync(filePath, 'utf8'));
|
||||
if (data.meta?.box_uv === true) return { skipped: 'box_uv' };
|
||||
|
||||
const uvW = data.resolution?.width ?? 16;
|
||||
const uvH = data.resolution?.height ?? 16;
|
||||
const elements = data.elements || [];
|
||||
const cubeGroup = new Map();
|
||||
buildCubeGroupMap(data.outliner || [], null, cubeGroup);
|
||||
|
||||
// Per-cube faces
|
||||
const allRects = [];
|
||||
const groupRects = new Map(); // groupName -> [rects]
|
||||
|
||||
for (const el of elements) {
|
||||
if (el.type && el.type !== 'cube') continue;
|
||||
if (!el.faces) continue;
|
||||
const groupName = cubeGroup.get(el.uuid) || '__root__';
|
||||
if (!groupRects.has(groupName)) groupRects.set(groupName, []);
|
||||
for (const fname of FACE_NAMES) {
|
||||
const face = el.faces[fname];
|
||||
if (!face || !face.uv) continue;
|
||||
const r = rectOf(face.uv);
|
||||
if (r.w === 0 || r.h === 0) continue;
|
||||
allRects.push({ ...r, cube: el.name, group: groupName, face: fname });
|
||||
groupRects.get(groupName).push({ ...r, cube: el.name, face: fname });
|
||||
}
|
||||
}
|
||||
|
||||
if (!allRects.length) return { skipped: 'no_faces' };
|
||||
|
||||
// Global bbox
|
||||
const globalBbox = bboxOf(allRects);
|
||||
const usedArea = allRects.reduce((s, r) => s + rectArea(r), 0);
|
||||
const bboxArea = globalBbox.w * globalBbox.h;
|
||||
const canvasArea = uvW * uvH;
|
||||
|
||||
// Identical-rect dedupe — count how many distinct rect positions+sizes
|
||||
const uniqRects = new Set(allRects.map(r => `${r.x},${r.y},${r.w},${r.h}`));
|
||||
const sharedRectFraction = 1 - uniqRects.size / allRects.length;
|
||||
|
||||
// Flip usage
|
||||
const flippedFraction =
|
||||
allRects.filter(r => r.flipX || r.flipY).length / allRects.length;
|
||||
|
||||
// Group clustering: for each group, how tight is its bbox vs. group total area?
|
||||
const groupStats = [];
|
||||
for (const [name, rects] of groupRects.entries()) {
|
||||
if (!rects.length) continue;
|
||||
const bb = bboxOf(rects);
|
||||
const a = rects.reduce((s, r) => s + rectArea(r), 0);
|
||||
const bbA = bb.w * bb.h;
|
||||
groupStats.push({
|
||||
name,
|
||||
faces: rects.length,
|
||||
totalArea: a,
|
||||
bboxW: bb.w,
|
||||
bboxH: bb.h,
|
||||
bboxArea: bbA,
|
||||
density: bbA > 0 ? a / bbA : 0,
|
||||
});
|
||||
}
|
||||
|
||||
// Group separation score: do groups overlap each other's bboxes?
|
||||
let groupBboxOverlapPairs = 0;
|
||||
for (let i = 0; i < groupStats.length; i++) {
|
||||
for (let j = i + 1; j < groupStats.length; j++) {
|
||||
const a = { x: 0, y: 0, w: 0, h: 0 }; // recompute
|
||||
}
|
||||
}
|
||||
// Better: actually compute group bboxes once and check overlap
|
||||
const gBboxes = [];
|
||||
for (const [name, rects] of groupRects.entries()) {
|
||||
if (!rects.length) continue;
|
||||
gBboxes.push({ name, ...bboxOf(rects) });
|
||||
}
|
||||
for (let i = 0; i < gBboxes.length; i++) {
|
||||
for (let j = i + 1; j < gBboxes.length; j++) {
|
||||
if (rectsOverlap(gBboxes[i], gBboxes[j])) groupBboxOverlapPairs++;
|
||||
}
|
||||
}
|
||||
const totalGroupPairs = (gBboxes.length * (gBboxes.length - 1)) / 2;
|
||||
const groupOverlapRatio = totalGroupPairs ? groupBboxOverlapPairs / totalGroupPairs : 0;
|
||||
|
||||
// Sort within groups: do faces appear biggest-first in the outliner / file order?
|
||||
// Approximate: take group rects in file order, see how often successor area <= prev area.
|
||||
let monotoneDecreaseHits = 0, monotoneDecreaseTotal = 0;
|
||||
for (const rects of groupRects.values()) {
|
||||
for (let i = 1; i < rects.length; i++) {
|
||||
monotoneDecreaseTotal++;
|
||||
if (rectArea(rects[i]) <= rectArea(rects[i - 1])) monotoneDecreaseHits++;
|
||||
}
|
||||
}
|
||||
const biggestFirstScore = monotoneDecreaseTotal
|
||||
? monotoneDecreaseHits / monotoneDecreaseTotal
|
||||
: 0;
|
||||
|
||||
// Padding: nearest-neighbor gap between non-overlapping rects
|
||||
// For each rect, look at its right and bottom neighbors and measure gap
|
||||
// Approximate by sampling: spend min gap across all neighbors
|
||||
const sortedByX = [...allRects].sort((a, b) => a.y - b.y || a.x - b.x);
|
||||
let zeroGapCount = 0, smallGapCount = 0, totalGapsMeasured = 0;
|
||||
for (let i = 0; i < sortedByX.length; i++) {
|
||||
for (let j = i + 1; j < sortedByX.length; j++) {
|
||||
const a = sortedByX[i], b = sortedByX[j];
|
||||
if (b.y > a.y + a.h + 4) break;
|
||||
// Right-edge to left-edge gap when y-overlap exists
|
||||
const yOverlap = Math.min(a.y + a.h, b.y + b.h) - Math.max(a.y, b.y);
|
||||
if (yOverlap > 0) {
|
||||
const gap = b.x - (a.x + a.w);
|
||||
if (gap >= 0 && gap <= 4) {
|
||||
totalGapsMeasured++;
|
||||
if (gap === 0) zeroGapCount++;
|
||||
else if (gap <= 1) smallGapCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
const zeroPaddingFraction = totalGapsMeasured ? zeroGapCount / totalGapsMeasured : 0;
|
||||
|
||||
return {
|
||||
name: path.basename(filePath, '.bbmodel'),
|
||||
canvas: { uvW, uvH },
|
||||
cubes: elements.length,
|
||||
groups: gBboxes.length,
|
||||
faces: allRects.length,
|
||||
uniqueRects: uniqRects.size,
|
||||
sharedRectFraction: round(sharedRectFraction, 3),
|
||||
flippedFraction: round(flippedFraction, 3),
|
||||
globalBbox,
|
||||
bboxFillOfCanvas: round(bboxArea / canvasArea, 3),
|
||||
densityInBbox: round(usedArea / Math.max(bboxArea, 1), 3),
|
||||
groupStats: groupStats.map(g => ({ ...g, density: round(g.density, 3) })),
|
||||
groupOverlapRatio: round(groupOverlapRatio, 3),
|
||||
biggestFirstScore: round(biggestFirstScore, 3),
|
||||
zeroPaddingFraction: round(zeroPaddingFraction, 3),
|
||||
paddedSamples: totalGapsMeasured,
|
||||
};
|
||||
}
|
||||
|
||||
function round(n, d) {
|
||||
return Math.round(n * 10 ** d) / 10 ** d;
|
||||
}
|
||||
|
||||
const files = fs
|
||||
.readdirSync(MODELS_DIR)
|
||||
.filter(f => f.endsWith('.bbmodel'))
|
||||
.map(f => path.join(MODELS_DIR, f));
|
||||
|
||||
const results = [];
|
||||
const skipped = { box_uv: 0, no_faces: 0, error: 0 };
|
||||
for (const f of files) {
|
||||
try {
|
||||
const r = analyzeModel(f);
|
||||
if (r.skipped) {
|
||||
skipped[r.skipped]++;
|
||||
continue;
|
||||
}
|
||||
results.push(r);
|
||||
} catch (e) {
|
||||
skipped.error++;
|
||||
}
|
||||
}
|
||||
|
||||
// Aggregate
|
||||
function avg(xs) {
|
||||
return xs.length ? xs.reduce((a, b) => a + b, 0) / xs.length : 0;
|
||||
}
|
||||
function median(xs) {
|
||||
if (!xs.length) return 0;
|
||||
const s = [...xs].sort((a, b) => a - b);
|
||||
return s[Math.floor(s.length / 2)];
|
||||
}
|
||||
function pctile(xs, p) {
|
||||
if (!xs.length) return 0;
|
||||
const s = [...xs].sort((a, b) => a - b);
|
||||
return s[Math.floor((s.length - 1) * p)];
|
||||
}
|
||||
|
||||
const summary = {
|
||||
totalAnalyzed: results.length,
|
||||
skipped,
|
||||
avgCubes: round(avg(results.map(r => r.cubes)), 1),
|
||||
avgGroups: round(avg(results.map(r => r.groups)), 1),
|
||||
avgFaces: round(avg(results.map(r => r.faces)), 1),
|
||||
medianBboxFillOfCanvas: round(median(results.map(r => r.bboxFillOfCanvas)), 3),
|
||||
medianDensityInBbox: round(median(results.map(r => r.densityInBbox)), 3),
|
||||
medianSharedRectFraction: round(median(results.map(r => r.sharedRectFraction)), 3),
|
||||
medianFlippedFraction: round(median(results.map(r => r.flippedFraction)), 3),
|
||||
medianGroupOverlapRatio: round(median(results.map(r => r.groupOverlapRatio)), 3),
|
||||
medianBiggestFirstScore: round(median(results.map(r => r.biggestFirstScore)), 3),
|
||||
medianZeroPadFraction: round(median(results.map(r => r.zeroPaddingFraction)), 3),
|
||||
// Distribution-of-density
|
||||
densityP10: round(pctile(results.map(r => r.densityInBbox), 0.1), 3),
|
||||
densityP50: round(pctile(results.map(r => r.densityInBbox), 0.5), 3),
|
||||
densityP90: round(pctile(results.map(r => r.densityInBbox), 0.9), 3),
|
||||
groupOverlapP10: round(pctile(results.map(r => r.groupOverlapRatio), 0.1), 3),
|
||||
groupOverlapP50: round(pctile(results.map(r => r.groupOverlapRatio), 0.5), 3),
|
||||
groupOverlapP90: round(pctile(results.map(r => r.groupOverlapRatio), 0.9), 3),
|
||||
biggestFirstP10: round(pctile(results.map(r => r.biggestFirstScore), 0.1), 3),
|
||||
biggestFirstP50: round(pctile(results.map(r => r.biggestFirstScore), 0.5), 3),
|
||||
biggestFirstP90: round(pctile(results.map(r => r.biggestFirstScore), 0.9), 3),
|
||||
};
|
||||
|
||||
console.log('=== AGGREGATE ===');
|
||||
console.log(JSON.stringify(summary, null, 2));
|
||||
|
||||
// Show 10 sample model breakdowns of varying complexity
|
||||
const samples = [...results].sort((a, b) => a.faces - b.faces);
|
||||
const picks = [
|
||||
samples[Math.floor(samples.length * 0.1)],
|
||||
samples[Math.floor(samples.length * 0.3)],
|
||||
samples[Math.floor(samples.length * 0.5)],
|
||||
samples[Math.floor(samples.length * 0.7)],
|
||||
samples[Math.floor(samples.length * 0.9)],
|
||||
].filter(Boolean);
|
||||
|
||||
console.log('\n=== SAMPLE BREAKDOWNS (low → high complexity) ===');
|
||||
for (const r of picks) {
|
||||
console.log(`\n--- ${r.name} ---`);
|
||||
console.log(` canvas: ${r.canvas.uvW}x${r.canvas.uvH}, cubes: ${r.cubes}, groups: ${r.groups}, faces: ${r.faces}`);
|
||||
console.log(` global UV bbox: ${r.globalBbox.w}x${r.globalBbox.h} @ (${r.globalBbox.x},${r.globalBbox.y}) fill=${r.bboxFillOfCanvas} density=${r.densityInBbox}`);
|
||||
console.log(` shared-rect fraction: ${r.sharedRectFraction}, flipped: ${r.flippedFraction}, zero-padding gaps: ${r.zeroPaddingFraction} (n=${r.paddedSamples})`);
|
||||
console.log(` group-bbox overlap ratio: ${r.groupOverlapRatio} biggest-first score: ${r.biggestFirstScore}`);
|
||||
if (r.groupStats.length <= 8) {
|
||||
for (const g of r.groupStats) {
|
||||
console.log(` group "${g.name}": ${g.faces} faces, area=${g.totalArea}, bbox=${g.bboxW}x${g.bboxH}, density=${g.density}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,35 @@
|
||||
Held-out models: 123
|
||||
Held-out pairs: 41,713
|
||||
AUC: 0.9847
|
||||
Average prec: 0.9809
|
||||
|
||||
Threshold sweep (held-out pairs):
|
||||
thresh prec recall f1 kept%
|
||||
0.50 0.9250 0.9755 0.9496 52.77%
|
||||
0.70 0.9423 0.9490 0.9456 50.40%
|
||||
0.80 0.9550 0.9130 0.9335 47.84%
|
||||
0.85 0.9614 0.8828 0.9204 45.95%
|
||||
0.90 0.9681 0.8211 0.8886 42.44%
|
||||
0.95 0.9760 0.6656 0.7915 34.13%
|
||||
|
||||
Top features by gain:
|
||||
area_ratio 984480
|
||||
aspect_diff 194900
|
||||
abs_w_diff 84968
|
||||
abs_h_diff 49857
|
||||
cube_dist 37537
|
||||
a_cubeH 28154
|
||||
a_cubeW 26844
|
||||
area_min 24794
|
||||
swap_w_diff 22231
|
||||
cube_w_diff 21646
|
||||
cube_h_diff 20308
|
||||
a_cubeD 20095
|
||||
same_parent 18643
|
||||
b_cubeD 18257
|
||||
cube_d_diff 16869
|
||||
b_cubeH 16518
|
||||
direction_match 13923
|
||||
b_cubeW 13617
|
||||
a_uvH 12096
|
||||
a_uvW 11763
|
||||
@ -0,0 +1,309 @@
|
||||
// Phase 1: extract face-pair training data for the share-prediction classifier.
|
||||
//
|
||||
// Usage:
|
||||
// node extract_pairs.mjs <path/to/bbmodel/directory>
|
||||
// MODELS_DIR=path/to/dir node extract_pairs.mjs
|
||||
// Output: pairs.csv next to this script (~250k rows on a 600-model corpus)
|
||||
//
|
||||
// Label: 1 if the two faces share a normalized UV rect within the same texture
|
||||
// in the source bbmodel, 0 otherwise. Negatives are HARD negatives — sampled
|
||||
// from the same model + biased toward the same parent or similar dimensions —
|
||||
// so the classifier learns "looks shareable but artist chose not to," not the
|
||||
// trivial "obviously different sizes."
|
||||
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
const HERE = path.dirname(fileURLToPath(import.meta.url));
|
||||
const ROOT = path.resolve(process.argv[2] || process.env.MODELS_DIR || path.join(HERE, '..', 'bbmodel'));
|
||||
const OUT = path.join(HERE, 'pairs.csv');
|
||||
if (!fs.existsSync(ROOT)) {
|
||||
console.error(`Models dir not found: ${ROOT}`);
|
||||
console.error('Pass it as an argument or set MODELS_DIR.');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const FACE_NAMES = ['north', 'east', 'south', 'west', 'up', 'down'];
|
||||
const DIR_IDX = Object.fromEntries(FACE_NAMES.map((n, i) => [n, i]));
|
||||
const OPPOSITE = { north: 'south', south: 'north', east: 'west', west: 'east', up: 'down', down: 'up' };
|
||||
const AXIS = { north: 'z', south: 'z', east: 'x', west: 'x', up: 'y', down: 'y' };
|
||||
|
||||
const TARGET_POSITIVES_PER_MODEL = 200;
|
||||
const NEG_RATIO = 1.0; // negatives per positive
|
||||
const RNG_SEED = 42;
|
||||
|
||||
let rngState = RNG_SEED;
|
||||
function rand() {
|
||||
rngState = (rngState * 1664525 + 1013904223) >>> 0;
|
||||
return rngState / 0x100000000;
|
||||
}
|
||||
function shuffle(arr) {
|
||||
for (let i = arr.length - 1; i > 0; i--) {
|
||||
const j = Math.floor(rand() * (i + 1));
|
||||
[arr[i], arr[j]] = [arr[j], arr[i]];
|
||||
}
|
||||
return arr;
|
||||
}
|
||||
|
||||
function buildCubeGroupMap(nodes, parentId, parentDepth, out, groupSeq) {
|
||||
for (const node of nodes) {
|
||||
if (typeof node === 'string') {
|
||||
out.set(node, { parentId, depth: parentDepth });
|
||||
} else if (node && Array.isArray(node.children)) {
|
||||
const id = node.uuid || `group_${groupSeq.n++}`;
|
||||
buildCubeGroupMap(node.children, id, parentDepth + 1, out, groupSeq);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function extractFacesFromModel(filePath) {
|
||||
const data = JSON.parse(fs.readFileSync(filePath, 'utf8'));
|
||||
if (data.meta?.box_uv === true) return null;
|
||||
const elements = data.elements || [];
|
||||
const cubeGroup = new Map();
|
||||
buildCubeGroupMap(data.outliner || [], null, 0, cubeGroup, { n: 0 });
|
||||
|
||||
const faces = [];
|
||||
for (const el of elements) {
|
||||
if (el.type && el.type !== 'cube') continue;
|
||||
if (!el.faces) continue;
|
||||
const from = el.from || [0, 0, 0];
|
||||
const to = el.to || [0, 0, 0];
|
||||
const cubeW = Math.abs(to[0] - from[0]);
|
||||
const cubeH = Math.abs(to[1] - from[1]);
|
||||
const cubeD = Math.abs(to[2] - from[2]);
|
||||
const center = [
|
||||
(from[0] + to[0]) / 2,
|
||||
(from[1] + to[1]) / 2,
|
||||
(from[2] + to[2]) / 2,
|
||||
];
|
||||
const rot = el.rotation || [0, 0, 0];
|
||||
const grp = cubeGroup.get(el.uuid) || { parentId: '__root__', depth: 0 };
|
||||
|
||||
for (const fname of FACE_NAMES) {
|
||||
const face = el.faces[fname];
|
||||
if (!face || !face.uv) continue;
|
||||
const tex = face.texture;
|
||||
if (tex === null || tex === undefined || tex === false) continue;
|
||||
const [x1, y1, x2, y2] = face.uv;
|
||||
const w = Math.abs(x2 - x1);
|
||||
const h = Math.abs(y2 - y1);
|
||||
if (w === 0 || h === 0) continue;
|
||||
const minX = Math.min(x1, x2);
|
||||
const minY = Math.min(y1, y2);
|
||||
const texKey = String(tex);
|
||||
const shareKey = `${texKey}|${minX.toFixed(4)},${minY.toFixed(4)},${w.toFixed(4)},${h.toFixed(4)}`;
|
||||
|
||||
faces.push({
|
||||
cube: el.uuid,
|
||||
cubeName: el.name || '',
|
||||
parentId: grp.parentId,
|
||||
depth: grp.depth,
|
||||
cubeW, cubeH, cubeD,
|
||||
cx: center[0], cy: center[1], cz: center[2],
|
||||
rotX: rot[0], rotY: rot[1], rotZ: rot[2],
|
||||
hasRot: rot[0] !== 0 || rot[1] !== 0 || rot[2] !== 0 ? 1 : 0,
|
||||
dir: fname,
|
||||
uvW: w,
|
||||
uvH: h,
|
||||
flipX: x2 < x1 ? 1 : 0,
|
||||
flipY: y2 < y1 ? 1 : 0,
|
||||
texKey,
|
||||
shareKey,
|
||||
});
|
||||
}
|
||||
}
|
||||
return faces;
|
||||
}
|
||||
|
||||
function pairFeatures(a, b) {
|
||||
const aspectA = a.uvW / a.uvH;
|
||||
const aspectB = b.uvW / b.uvH;
|
||||
const areaA = a.uvW * a.uvH;
|
||||
const areaB = b.uvW * b.uvH;
|
||||
const dx = a.cx - b.cx;
|
||||
const dy = a.cy - b.cy;
|
||||
const dz = a.cz - b.cz;
|
||||
return {
|
||||
a_dir: DIR_IDX[a.dir],
|
||||
b_dir: DIR_IDX[b.dir],
|
||||
a_uvW: a.uvW, a_uvH: a.uvH,
|
||||
b_uvW: b.uvW, b_uvH: b.uvH,
|
||||
abs_w_diff: Math.abs(a.uvW - b.uvW),
|
||||
abs_h_diff: Math.abs(a.uvH - b.uvH),
|
||||
swap_w_diff: Math.abs(a.uvW - b.uvH), // useful when shapes match after rotation
|
||||
swap_h_diff: Math.abs(a.uvH - b.uvW),
|
||||
area_min: Math.min(areaA, areaB),
|
||||
area_max: Math.max(areaA, areaB),
|
||||
area_ratio: Math.min(areaA, areaB) / Math.max(areaA, areaB),
|
||||
aspect_diff: Math.abs(aspectA - aspectB),
|
||||
a_cubeW: a.cubeW, a_cubeH: a.cubeH, a_cubeD: a.cubeD,
|
||||
b_cubeW: b.cubeW, b_cubeH: b.cubeH, b_cubeD: b.cubeD,
|
||||
cube_dim_match: (a.cubeW === b.cubeW && a.cubeH === b.cubeH && a.cubeD === b.cubeD) ? 1 : 0,
|
||||
cube_w_diff: Math.abs(a.cubeW - b.cubeW),
|
||||
cube_h_diff: Math.abs(a.cubeH - b.cubeH),
|
||||
cube_d_diff: Math.abs(a.cubeD - b.cubeD),
|
||||
same_cube: a.cube === b.cube ? 1 : 0,
|
||||
same_parent: a.parentId === b.parentId ? 1 : 0,
|
||||
direction_match: a.dir === b.dir ? 1 : 0,
|
||||
direction_opposite: OPPOSITE[a.dir] === b.dir ? 1 : 0,
|
||||
same_axis: AXIS[a.dir] === AXIS[b.dir] ? 1 : 0,
|
||||
a_axis: AXIS[a.dir], // categorical
|
||||
b_axis: AXIS[b.dir],
|
||||
flip_match: (a.flipX === b.flipX && a.flipY === b.flipY) ? 1 : 0,
|
||||
has_rot_either: (a.hasRot || b.hasRot) ? 1 : 0,
|
||||
rot_match: (a.rotX === b.rotX && a.rotY === b.rotY && a.rotZ === b.rotZ) ? 1 : 0,
|
||||
cube_dist: Math.sqrt(dx * dx + dy * dy + dz * dz),
|
||||
same_texture: a.texKey === b.texKey ? 1 : 0,
|
||||
depth_diff: Math.abs(a.depth - b.depth),
|
||||
};
|
||||
}
|
||||
|
||||
const FEATURE_KEYS = [
|
||||
'a_dir', 'b_dir',
|
||||
'a_uvW', 'a_uvH', 'b_uvW', 'b_uvH',
|
||||
'abs_w_diff', 'abs_h_diff', 'swap_w_diff', 'swap_h_diff',
|
||||
'area_min', 'area_max', 'area_ratio', 'aspect_diff',
|
||||
'a_cubeW', 'a_cubeH', 'a_cubeD', 'b_cubeW', 'b_cubeH', 'b_cubeD',
|
||||
'cube_dim_match', 'cube_w_diff', 'cube_h_diff', 'cube_d_diff',
|
||||
'same_cube', 'same_parent',
|
||||
'direction_match', 'direction_opposite', 'same_axis', 'a_axis', 'b_axis',
|
||||
'flip_match', 'has_rot_either', 'rot_match',
|
||||
'cube_dist', 'same_texture', 'depth_diff',
|
||||
];
|
||||
|
||||
const AXIS_IDX = { x: 0, y: 1, z: 2 };
|
||||
function encodeAxis(v) { return AXIS_IDX[v]; }
|
||||
|
||||
function rowToCsv(modelName, label, feat) {
|
||||
const cells = [JSON.stringify(modelName), label];
|
||||
for (const k of FEATURE_KEYS) {
|
||||
let v = feat[k];
|
||||
if (k === 'a_axis' || k === 'b_axis') v = encodeAxis(v);
|
||||
cells.push(typeof v === 'number' ? (Number.isInteger(v) ? v : v.toFixed(4)) : v);
|
||||
}
|
||||
return cells.join(',');
|
||||
}
|
||||
|
||||
function buildPairsForModel(faces, modelName) {
|
||||
// Group faces by share key (only considers within-texture sharing)
|
||||
const shareMap = new Map();
|
||||
for (let i = 0; i < faces.length; i++) {
|
||||
if (!shareMap.has(faces[i].shareKey)) shareMap.set(faces[i].shareKey, []);
|
||||
shareMap.get(faces[i].shareKey).push(i);
|
||||
}
|
||||
|
||||
// Positive pairs: every (i, j) within a share group of size >= 2
|
||||
const positives = [];
|
||||
for (const idxs of shareMap.values()) {
|
||||
if (idxs.length < 2) continue;
|
||||
for (let i = 0; i < idxs.length; i++) {
|
||||
for (let j = i + 1; j < idxs.length; j++) {
|
||||
positives.push([idxs[i], idxs[j]]);
|
||||
}
|
||||
}
|
||||
}
|
||||
shuffle(positives);
|
||||
const positivesCapped = positives.slice(0, TARGET_POSITIVES_PER_MODEL);
|
||||
|
||||
// Hard negatives: same texture, biased toward same-parent or close-dim faces.
|
||||
// Build candidate pool keyed by texture for fast lookup.
|
||||
const facesByTex = new Map();
|
||||
for (let i = 0; i < faces.length; i++) {
|
||||
const t = faces[i].texKey;
|
||||
if (!facesByTex.has(t)) facesByTex.set(t, []);
|
||||
facesByTex.get(t).push(i);
|
||||
}
|
||||
|
||||
const positiveSet = new Set(positives.map(([a, b]) => a < b ? `${a},${b}` : `${b},${a}`));
|
||||
|
||||
const targetNegatives = Math.max(positivesCapped.length, Math.round(positivesCapped.length * NEG_RATIO));
|
||||
const negatives = [];
|
||||
let attempts = 0;
|
||||
const maxAttempts = targetNegatives * 30;
|
||||
|
||||
while (negatives.length < targetNegatives && attempts < maxAttempts) {
|
||||
attempts++;
|
||||
// Pick a face uniformly, then pick a partner from the same texture pool,
|
||||
// preferring same-parent.
|
||||
const a = Math.floor(rand() * faces.length);
|
||||
const fa = faces[a];
|
||||
const pool = facesByTex.get(fa.texKey);
|
||||
if (!pool || pool.length < 2) continue;
|
||||
|
||||
// 60% chance: same-parent partner if possible; else any.
|
||||
let b;
|
||||
if (rand() < 0.6) {
|
||||
const sameParent = pool.filter((idx) => idx !== a && faces[idx].parentId === fa.parentId);
|
||||
if (sameParent.length) {
|
||||
b = sameParent[Math.floor(rand() * sameParent.length)];
|
||||
} else {
|
||||
b = pool[Math.floor(rand() * pool.length)];
|
||||
}
|
||||
} else {
|
||||
b = pool[Math.floor(rand() * pool.length)];
|
||||
}
|
||||
if (b === a) continue;
|
||||
|
||||
const key = a < b ? `${a},${b}` : `${b},${a}`;
|
||||
if (positiveSet.has(key)) continue;
|
||||
// Optional: filter out blatant negatives (very different sizes) to avoid trivial training
|
||||
const fb = faces[b];
|
||||
const minA = Math.min(fa.uvW, fa.uvH), maxA = Math.max(fa.uvW, fa.uvH);
|
||||
const minB = Math.min(fb.uvW, fb.uvH), maxB = Math.max(fb.uvW, fb.uvH);
|
||||
const sizeRatio = Math.min(minA / maxB, minB / maxA);
|
||||
if (sizeRatio < 0.1) {
|
||||
// Allow some, but not too many
|
||||
if (rand() > 0.2) continue;
|
||||
}
|
||||
negatives.push([a, b]);
|
||||
}
|
||||
|
||||
return { positives: positivesCapped, negatives, faces, modelName };
|
||||
}
|
||||
|
||||
function main() {
|
||||
const files = fs.readdirSync(ROOT).filter((f) => f.endsWith('.bbmodel'));
|
||||
const writer = fs.createWriteStream(OUT);
|
||||
writer.write(['model', 'label', ...FEATURE_KEYS].join(',') + '\n');
|
||||
|
||||
let totalPos = 0, totalNeg = 0, modelsUsed = 0, skipped = 0;
|
||||
for (const f of files) {
|
||||
const fp = path.join(ROOT, f);
|
||||
let faces;
|
||||
try {
|
||||
faces = extractFacesFromModel(fp);
|
||||
} catch (e) {
|
||||
skipped++;
|
||||
continue;
|
||||
}
|
||||
if (!faces || faces.length < 2) {
|
||||
skipped++;
|
||||
continue;
|
||||
}
|
||||
const modelName = path.basename(f, '.bbmodel');
|
||||
const { positives, negatives } = buildPairsForModel(faces, modelName);
|
||||
if (positives.length === 0) {
|
||||
skipped++;
|
||||
continue;
|
||||
}
|
||||
for (const [i, j] of positives) {
|
||||
writer.write(rowToCsv(modelName, 1, pairFeatures(faces[i], faces[j])) + '\n');
|
||||
}
|
||||
for (const [i, j] of negatives) {
|
||||
writer.write(rowToCsv(modelName, 0, pairFeatures(faces[i], faces[j])) + '\n');
|
||||
}
|
||||
totalPos += positives.length;
|
||||
totalNeg += negatives.length;
|
||||
modelsUsed++;
|
||||
}
|
||||
writer.end();
|
||||
|
||||
console.log(`Wrote ${OUT}`);
|
||||
console.log(`Models: ${modelsUsed} used, ${skipped} skipped`);
|
||||
console.log(`Pairs: ${totalPos} positive, ${totalNeg} negative (${(totalPos / (totalPos + totalNeg) * 100).toFixed(1)}% positive)`);
|
||||
console.log(`Features: ${FEATURE_KEYS.length}`);
|
||||
}
|
||||
|
||||
main();
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -0,0 +1,209 @@
|
||||
"""Phase 2: train the share-prediction classifier.
|
||||
|
||||
Setup (one-time):
|
||||
pip install lightgbm pandas scikit-learn
|
||||
|
||||
Run (from this directory):
|
||||
python train_share_classifier.py
|
||||
|
||||
Inputs:
|
||||
pairs.csv (next to this script — generated by extract_pairs.mjs)
|
||||
|
||||
Outputs:
|
||||
share_model.json LightGBM tree dump for the in-plugin JS evaluator
|
||||
eval_report.txt precision/recall sweep + feature importance
|
||||
inference_test_cases.json self-test cases the plugin verifies on load
|
||||
|
||||
Held-out split is by MODEL, not by pair, so the eval numbers reflect
|
||||
generalization to unseen models — not just unseen pairs from already-seen models.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import lightgbm as lgb
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from sklearn.metrics import (
|
||||
average_precision_score,
|
||||
precision_recall_curve,
|
||||
roc_auc_score,
|
||||
)
|
||||
from sklearn.model_selection import GroupShuffleSplit
|
||||
|
||||
ROOT = Path(__file__).resolve().parent
|
||||
PAIRS = ROOT / "pairs.csv"
|
||||
MODEL_OUT = ROOT / "share_model.json"
|
||||
REPORT_OUT = ROOT / "eval_report.txt"
|
||||
|
||||
CATEGORICAL = ["a_dir", "b_dir", "a_axis", "b_axis"]
|
||||
LABEL = "label"
|
||||
GROUP = "model"
|
||||
|
||||
# Operating thresholds we want precision/recall reported at.
|
||||
THRESHOLDS = [0.50, 0.70, 0.80, 0.85, 0.90, 0.95]
|
||||
|
||||
|
||||
def main() -> None:
|
||||
df = pd.read_csv(PAIRS)
|
||||
print(f"Loaded {len(df):,} pairs across {df[GROUP].nunique()} models")
|
||||
|
||||
feature_cols = [c for c in df.columns if c not in (LABEL, GROUP)]
|
||||
X = df[feature_cols]
|
||||
y = df[LABEL].astype(int)
|
||||
groups = df[GROUP]
|
||||
|
||||
# Group-aware 80/20 split — same model never in both train and test.
|
||||
splitter = GroupShuffleSplit(n_splits=1, test_size=0.20, random_state=42)
|
||||
train_idx, test_idx = next(splitter.split(X, y, groups))
|
||||
X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
|
||||
y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
|
||||
print(
|
||||
f"Train: {len(X_train):,} pairs / {df.iloc[train_idx][GROUP].nunique()} models |"
|
||||
f" Test: {len(X_test):,} pairs / {df.iloc[test_idx][GROUP].nunique()} models"
|
||||
)
|
||||
|
||||
# Coerce categoricals
|
||||
for c in CATEGORICAL:
|
||||
X_train[c] = X_train[c].astype("category")
|
||||
X_test[c] = X_test[c].astype("category")
|
||||
|
||||
train_set = lgb.Dataset(X_train, label=y_train, categorical_feature=CATEGORICAL)
|
||||
valid_set = lgb.Dataset(X_test, label=y_test, categorical_feature=CATEGORICAL, reference=train_set)
|
||||
|
||||
params = {
|
||||
"objective": "binary",
|
||||
"metric": ["binary_logloss", "auc"],
|
||||
"learning_rate": 0.05,
|
||||
"num_leaves": 63,
|
||||
"min_data_in_leaf": 50,
|
||||
"feature_fraction": 0.9,
|
||||
"bagging_fraction": 0.8,
|
||||
"bagging_freq": 5,
|
||||
"verbose": -1,
|
||||
}
|
||||
|
||||
model = lgb.train(
|
||||
params,
|
||||
train_set,
|
||||
num_boost_round=600,
|
||||
valid_sets=[train_set, valid_set],
|
||||
valid_names=["train", "valid"],
|
||||
callbacks=[
|
||||
lgb.early_stopping(stopping_rounds=30, verbose=True),
|
||||
lgb.log_evaluation(period=50),
|
||||
],
|
||||
)
|
||||
|
||||
# ---- Eval ----
|
||||
y_pred = model.predict(X_test, num_iteration=model.best_iteration)
|
||||
auc = roc_auc_score(y_test, y_pred)
|
||||
ap = average_precision_score(y_test, y_pred)
|
||||
|
||||
lines = []
|
||||
lines.append(f"Held-out models: {df.iloc[test_idx][GROUP].nunique()}")
|
||||
lines.append(f"Held-out pairs: {len(X_test):,}")
|
||||
lines.append(f"AUC: {auc:.4f}")
|
||||
lines.append(f"Average prec: {ap:.4f}")
|
||||
lines.append("")
|
||||
lines.append("Threshold sweep (held-out pairs):")
|
||||
lines.append(f"{'thresh':>8} {'prec':>8} {'recall':>8} {'f1':>8} {'kept%':>8}")
|
||||
precs, recs, thr = precision_recall_curve(y_test, y_pred)
|
||||
for t in THRESHOLDS:
|
||||
# find first threshold >= t
|
||||
mask = (y_pred >= t)
|
||||
kept = mask.mean()
|
||||
if mask.sum() == 0:
|
||||
lines.append(f"{t:>8.2f} {'-':>8} {'-':>8} {'-':>8} {kept * 100:>7.2f}%")
|
||||
continue
|
||||
tp = ((y_pred >= t) & (y_test == 1)).sum()
|
||||
fp = ((y_pred >= t) & (y_test == 0)).sum()
|
||||
fn = ((y_pred < t) & (y_test == 1)).sum()
|
||||
prec = tp / max(tp + fp, 1)
|
||||
rec = tp / max(tp + fn, 1)
|
||||
f1 = 2 * prec * rec / max(prec + rec, 1e-9)
|
||||
lines.append(f"{t:>8.2f} {prec:>8.4f} {rec:>8.4f} {f1:>8.4f} {kept * 100:>7.2f}%")
|
||||
|
||||
lines.append("")
|
||||
lines.append("Top features by gain:")
|
||||
gain = model.feature_importance(importance_type="gain")
|
||||
names = model.feature_name()
|
||||
order = np.argsort(gain)[::-1]
|
||||
for i in order[:20]:
|
||||
lines.append(f" {names[i]:>22} {gain[i]:>14.0f}")
|
||||
|
||||
report = "\n".join(lines)
|
||||
REPORT_OUT.write_text(report)
|
||||
print()
|
||||
print(report)
|
||||
print()
|
||||
print(f"Wrote {REPORT_OUT}")
|
||||
|
||||
# ---- Export trees in a JS-evaluable form ----
|
||||
dump = model.dump_model(num_iteration=model.best_iteration)
|
||||
# Preserve feature_names and categorical info for JS eval; trim heavy fields.
|
||||
export = {
|
||||
"version": 1,
|
||||
"objective": "binary",
|
||||
"feature_names": dump["feature_names"],
|
||||
"categorical_features": [
|
||||
dump["feature_names"][i] for i in dump.get("pandas_categorical_index", [])
|
||||
if i < len(dump["feature_names"])
|
||||
] if "pandas_categorical_index" in dump else CATEGORICAL,
|
||||
"best_iteration": int(model.best_iteration),
|
||||
"trees": [_compact_tree(t) for t in dump["tree_info"]],
|
||||
}
|
||||
MODEL_OUT.write_text(json.dumps(export, separators=(",", ":")))
|
||||
print(f"Wrote {MODEL_OUT} ({MODEL_OUT.stat().st_size / 1024:.1f} KB, "
|
||||
f"{len(export['trees'])} trees)")
|
||||
|
||||
# ---- Self-test cases for the JS evaluator ----
|
||||
# Pick 50 random held-out rows, save (feature dict, expected prediction)
|
||||
rng = np.random.default_rng(0)
|
||||
sample_idx = rng.choice(len(X_test), size=min(50, len(X_test)), replace=False)
|
||||
test_cases = []
|
||||
for i in sample_idx:
|
||||
row = X_test.iloc[i]
|
||||
features = {}
|
||||
for col in feature_cols:
|
||||
v = row[col]
|
||||
if col in CATEGORICAL:
|
||||
v = int(v)
|
||||
else:
|
||||
v = float(v)
|
||||
features[col] = v
|
||||
test_cases.append({
|
||||
"features": features,
|
||||
"expected_prob": float(y_pred[i]),
|
||||
})
|
||||
tests_out = ROOT / "inference_test_cases.json"
|
||||
tests_out.write_text(json.dumps(test_cases, separators=(",", ":")))
|
||||
print(f"Wrote {tests_out} ({len(test_cases)} cases)")
|
||||
|
||||
|
||||
def _compact_tree(tree: dict) -> dict:
|
||||
"""Strip the LightGBM tree to only fields the JS evaluator needs."""
|
||||
out = {"shrinkage": tree.get("shrinkage", 1.0), "root": _compact_node(tree["tree_structure"])}
|
||||
return out
|
||||
|
||||
|
||||
def _compact_node(node: dict) -> dict:
|
||||
if "leaf_value" in node:
|
||||
return {"v": node["leaf_value"]}
|
||||
out = {
|
||||
"f": node["split_feature"],
|
||||
"t": node["threshold"],
|
||||
"d": node["decision_type"], # "<=" or "==" (categorical)
|
||||
"default_left": node.get("default_left", True),
|
||||
"missing_type": node.get("missing_type", "None"),
|
||||
"l": _compact_node(node["left_child"]),
|
||||
"r": _compact_node(node["right_child"]),
|
||||
}
|
||||
return out
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@ -0,0 +1,55 @@
|
||||
// Validate the JS LightGBM evaluator against LightGBM's own predictions.
|
||||
// Run: node tools/blockbench-uv-packer/ml/validate_js_eval.mjs
|
||||
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
|
||||
const ROOT = path.dirname(new URL(import.meta.url).pathname.replace(/^\//, ''));
|
||||
const model = JSON.parse(fs.readFileSync(path.join(ROOT, 'share_model.json'), 'utf8'));
|
||||
const tests = JSON.parse(fs.readFileSync(path.join(ROOT, 'inference_test_cases.json'), 'utf8'));
|
||||
|
||||
function evalTree(node, x) {
|
||||
while (!('v' in node)) {
|
||||
const val = x[node.f];
|
||||
let goLeft;
|
||||
if (val === undefined || val === null || (typeof val === 'number' && isNaN(val))) {
|
||||
goLeft = node.default_left;
|
||||
} else if (node.d === '==') {
|
||||
const cats = String(node.t).split('||').map(Number);
|
||||
goLeft = cats.includes(val);
|
||||
} else {
|
||||
goLeft = val <= node.t;
|
||||
}
|
||||
node = goLeft ? node.l : node.r;
|
||||
}
|
||||
return node.v;
|
||||
}
|
||||
|
||||
function predict(features) {
|
||||
const x = new Array(model.feature_names.length);
|
||||
for (let i = 0; i < model.feature_names.length; i++) {
|
||||
x[i] = features[model.feature_names[i]];
|
||||
}
|
||||
let raw = 0;
|
||||
for (const tree of model.trees) raw += evalTree(tree.root, x);
|
||||
return 1 / (1 + Math.exp(-raw));
|
||||
}
|
||||
|
||||
let maxErr = 0;
|
||||
let worstCase = null;
|
||||
for (const tc of tests) {
|
||||
const got = predict(tc.features);
|
||||
const err = Math.abs(got - tc.expected_prob);
|
||||
if (err > maxErr) { maxErr = err; worstCase = { tc, got }; }
|
||||
}
|
||||
|
||||
console.log(`tests: ${tests.length}`);
|
||||
console.log(`max abs error: ${maxErr.toExponential(4)}`);
|
||||
if (worstCase) {
|
||||
console.log(`worst: expected=${worstCase.tc.expected_prob.toFixed(6)} got=${worstCase.got.toFixed(6)}`);
|
||||
}
|
||||
if (maxErr > 0.005) {
|
||||
console.error('FAIL — JS evaluator disagrees with LightGBM');
|
||||
process.exit(1);
|
||||
}
|
||||
console.log('PASS');
|
||||
Loading…
Reference in New Issue