You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

310 lines
11 KiB
JavaScript

// Phase 1: extract face-pair training data for the share-prediction classifier.
//
// Usage:
// node extract_pairs.mjs <path/to/bbmodel/directory>
// MODELS_DIR=path/to/dir node extract_pairs.mjs
// Output: pairs.csv next to this script (~250k rows on a 600-model corpus)
//
// Label: 1 if the two faces share a normalized UV rect within the same texture
// in the source bbmodel, 0 otherwise. Negatives are HARD negatives — sampled
// from the same model + biased toward the same parent or similar dimensions —
// so the classifier learns "looks shareable but artist chose not to," not the
// trivial "obviously different sizes."
import fs from 'node:fs';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
const HERE = path.dirname(fileURLToPath(import.meta.url));
const ROOT = path.resolve(process.argv[2] || process.env.MODELS_DIR || path.join(HERE, '..', 'bbmodel'));
const OUT = path.join(HERE, 'pairs.csv');
if (!fs.existsSync(ROOT)) {
console.error(`Models dir not found: ${ROOT}`);
console.error('Pass it as an argument or set MODELS_DIR.');
process.exit(1);
}
const FACE_NAMES = ['north', 'east', 'south', 'west', 'up', 'down'];
const DIR_IDX = Object.fromEntries(FACE_NAMES.map((n, i) => [n, i]));
const OPPOSITE = { north: 'south', south: 'north', east: 'west', west: 'east', up: 'down', down: 'up' };
const AXIS = { north: 'z', south: 'z', east: 'x', west: 'x', up: 'y', down: 'y' };
const TARGET_POSITIVES_PER_MODEL = 200;
const NEG_RATIO = 1.0; // negatives per positive
const RNG_SEED = 42;
let rngState = RNG_SEED;
function rand() {
rngState = (rngState * 1664525 + 1013904223) >>> 0;
return rngState / 0x100000000;
}
function shuffle(arr) {
for (let i = arr.length - 1; i > 0; i--) {
const j = Math.floor(rand() * (i + 1));
[arr[i], arr[j]] = [arr[j], arr[i]];
}
return arr;
}
function buildCubeGroupMap(nodes, parentId, parentDepth, out, groupSeq) {
for (const node of nodes) {
if (typeof node === 'string') {
out.set(node, { parentId, depth: parentDepth });
} else if (node && Array.isArray(node.children)) {
const id = node.uuid || `group_${groupSeq.n++}`;
buildCubeGroupMap(node.children, id, parentDepth + 1, out, groupSeq);
}
}
}
function extractFacesFromModel(filePath) {
const data = JSON.parse(fs.readFileSync(filePath, 'utf8'));
if (data.meta?.box_uv === true) return null;
const elements = data.elements || [];
const cubeGroup = new Map();
buildCubeGroupMap(data.outliner || [], null, 0, cubeGroup, { n: 0 });
const faces = [];
for (const el of elements) {
if (el.type && el.type !== 'cube') continue;
if (!el.faces) continue;
const from = el.from || [0, 0, 0];
const to = el.to || [0, 0, 0];
const cubeW = Math.abs(to[0] - from[0]);
const cubeH = Math.abs(to[1] - from[1]);
const cubeD = Math.abs(to[2] - from[2]);
const center = [
(from[0] + to[0]) / 2,
(from[1] + to[1]) / 2,
(from[2] + to[2]) / 2,
];
const rot = el.rotation || [0, 0, 0];
const grp = cubeGroup.get(el.uuid) || { parentId: '__root__', depth: 0 };
for (const fname of FACE_NAMES) {
const face = el.faces[fname];
if (!face || !face.uv) continue;
const tex = face.texture;
if (tex === null || tex === undefined || tex === false) continue;
const [x1, y1, x2, y2] = face.uv;
const w = Math.abs(x2 - x1);
const h = Math.abs(y2 - y1);
if (w === 0 || h === 0) continue;
const minX = Math.min(x1, x2);
const minY = Math.min(y1, y2);
const texKey = String(tex);
const shareKey = `${texKey}|${minX.toFixed(4)},${minY.toFixed(4)},${w.toFixed(4)},${h.toFixed(4)}`;
faces.push({
cube: el.uuid,
cubeName: el.name || '',
parentId: grp.parentId,
depth: grp.depth,
cubeW, cubeH, cubeD,
cx: center[0], cy: center[1], cz: center[2],
rotX: rot[0], rotY: rot[1], rotZ: rot[2],
hasRot: rot[0] !== 0 || rot[1] !== 0 || rot[2] !== 0 ? 1 : 0,
dir: fname,
uvW: w,
uvH: h,
flipX: x2 < x1 ? 1 : 0,
flipY: y2 < y1 ? 1 : 0,
texKey,
shareKey,
});
}
}
return faces;
}
function pairFeatures(a, b) {
const aspectA = a.uvW / a.uvH;
const aspectB = b.uvW / b.uvH;
const areaA = a.uvW * a.uvH;
const areaB = b.uvW * b.uvH;
const dx = a.cx - b.cx;
const dy = a.cy - b.cy;
const dz = a.cz - b.cz;
return {
a_dir: DIR_IDX[a.dir],
b_dir: DIR_IDX[b.dir],
a_uvW: a.uvW, a_uvH: a.uvH,
b_uvW: b.uvW, b_uvH: b.uvH,
abs_w_diff: Math.abs(a.uvW - b.uvW),
abs_h_diff: Math.abs(a.uvH - b.uvH),
swap_w_diff: Math.abs(a.uvW - b.uvH), // useful when shapes match after rotation
swap_h_diff: Math.abs(a.uvH - b.uvW),
area_min: Math.min(areaA, areaB),
area_max: Math.max(areaA, areaB),
area_ratio: Math.min(areaA, areaB) / Math.max(areaA, areaB),
aspect_diff: Math.abs(aspectA - aspectB),
a_cubeW: a.cubeW, a_cubeH: a.cubeH, a_cubeD: a.cubeD,
b_cubeW: b.cubeW, b_cubeH: b.cubeH, b_cubeD: b.cubeD,
cube_dim_match: (a.cubeW === b.cubeW && a.cubeH === b.cubeH && a.cubeD === b.cubeD) ? 1 : 0,
cube_w_diff: Math.abs(a.cubeW - b.cubeW),
cube_h_diff: Math.abs(a.cubeH - b.cubeH),
cube_d_diff: Math.abs(a.cubeD - b.cubeD),
same_cube: a.cube === b.cube ? 1 : 0,
same_parent: a.parentId === b.parentId ? 1 : 0,
direction_match: a.dir === b.dir ? 1 : 0,
direction_opposite: OPPOSITE[a.dir] === b.dir ? 1 : 0,
same_axis: AXIS[a.dir] === AXIS[b.dir] ? 1 : 0,
a_axis: AXIS[a.dir], // categorical
b_axis: AXIS[b.dir],
flip_match: (a.flipX === b.flipX && a.flipY === b.flipY) ? 1 : 0,
has_rot_either: (a.hasRot || b.hasRot) ? 1 : 0,
rot_match: (a.rotX === b.rotX && a.rotY === b.rotY && a.rotZ === b.rotZ) ? 1 : 0,
cube_dist: Math.sqrt(dx * dx + dy * dy + dz * dz),
same_texture: a.texKey === b.texKey ? 1 : 0,
depth_diff: Math.abs(a.depth - b.depth),
};
}
const FEATURE_KEYS = [
'a_dir', 'b_dir',
'a_uvW', 'a_uvH', 'b_uvW', 'b_uvH',
'abs_w_diff', 'abs_h_diff', 'swap_w_diff', 'swap_h_diff',
'area_min', 'area_max', 'area_ratio', 'aspect_diff',
'a_cubeW', 'a_cubeH', 'a_cubeD', 'b_cubeW', 'b_cubeH', 'b_cubeD',
'cube_dim_match', 'cube_w_diff', 'cube_h_diff', 'cube_d_diff',
'same_cube', 'same_parent',
'direction_match', 'direction_opposite', 'same_axis', 'a_axis', 'b_axis',
'flip_match', 'has_rot_either', 'rot_match',
'cube_dist', 'same_texture', 'depth_diff',
];
const AXIS_IDX = { x: 0, y: 1, z: 2 };
function encodeAxis(v) { return AXIS_IDX[v]; }
function rowToCsv(modelName, label, feat) {
const cells = [JSON.stringify(modelName), label];
for (const k of FEATURE_KEYS) {
let v = feat[k];
if (k === 'a_axis' || k === 'b_axis') v = encodeAxis(v);
cells.push(typeof v === 'number' ? (Number.isInteger(v) ? v : v.toFixed(4)) : v);
}
return cells.join(',');
}
function buildPairsForModel(faces, modelName) {
// Group faces by share key (only considers within-texture sharing)
const shareMap = new Map();
for (let i = 0; i < faces.length; i++) {
if (!shareMap.has(faces[i].shareKey)) shareMap.set(faces[i].shareKey, []);
shareMap.get(faces[i].shareKey).push(i);
}
// Positive pairs: every (i, j) within a share group of size >= 2
const positives = [];
for (const idxs of shareMap.values()) {
if (idxs.length < 2) continue;
for (let i = 0; i < idxs.length; i++) {
for (let j = i + 1; j < idxs.length; j++) {
positives.push([idxs[i], idxs[j]]);
}
}
}
shuffle(positives);
const positivesCapped = positives.slice(0, TARGET_POSITIVES_PER_MODEL);
// Hard negatives: same texture, biased toward same-parent or close-dim faces.
// Build candidate pool keyed by texture for fast lookup.
const facesByTex = new Map();
for (let i = 0; i < faces.length; i++) {
const t = faces[i].texKey;
if (!facesByTex.has(t)) facesByTex.set(t, []);
facesByTex.get(t).push(i);
}
const positiveSet = new Set(positives.map(([a, b]) => a < b ? `${a},${b}` : `${b},${a}`));
const targetNegatives = Math.max(positivesCapped.length, Math.round(positivesCapped.length * NEG_RATIO));
const negatives = [];
let attempts = 0;
const maxAttempts = targetNegatives * 30;
while (negatives.length < targetNegatives && attempts < maxAttempts) {
attempts++;
// Pick a face uniformly, then pick a partner from the same texture pool,
// preferring same-parent.
const a = Math.floor(rand() * faces.length);
const fa = faces[a];
const pool = facesByTex.get(fa.texKey);
if (!pool || pool.length < 2) continue;
// 60% chance: same-parent partner if possible; else any.
let b;
if (rand() < 0.6) {
const sameParent = pool.filter((idx) => idx !== a && faces[idx].parentId === fa.parentId);
if (sameParent.length) {
b = sameParent[Math.floor(rand() * sameParent.length)];
} else {
b = pool[Math.floor(rand() * pool.length)];
}
} else {
b = pool[Math.floor(rand() * pool.length)];
}
if (b === a) continue;
const key = a < b ? `${a},${b}` : `${b},${a}`;
if (positiveSet.has(key)) continue;
// Optional: filter out blatant negatives (very different sizes) to avoid trivial training
const fb = faces[b];
const minA = Math.min(fa.uvW, fa.uvH), maxA = Math.max(fa.uvW, fa.uvH);
const minB = Math.min(fb.uvW, fb.uvH), maxB = Math.max(fb.uvW, fb.uvH);
const sizeRatio = Math.min(minA / maxB, minB / maxA);
if (sizeRatio < 0.1) {
// Allow some, but not too many
if (rand() > 0.2) continue;
}
negatives.push([a, b]);
}
return { positives: positivesCapped, negatives, faces, modelName };
}
function main() {
const files = fs.readdirSync(ROOT).filter((f) => f.endsWith('.bbmodel'));
const writer = fs.createWriteStream(OUT);
writer.write(['model', 'label', ...FEATURE_KEYS].join(',') + '\n');
let totalPos = 0, totalNeg = 0, modelsUsed = 0, skipped = 0;
for (const f of files) {
const fp = path.join(ROOT, f);
let faces;
try {
faces = extractFacesFromModel(fp);
} catch (e) {
skipped++;
continue;
}
if (!faces || faces.length < 2) {
skipped++;
continue;
}
const modelName = path.basename(f, '.bbmodel');
const { positives, negatives } = buildPairsForModel(faces, modelName);
if (positives.length === 0) {
skipped++;
continue;
}
for (const [i, j] of positives) {
writer.write(rowToCsv(modelName, 1, pairFeatures(faces[i], faces[j])) + '\n');
}
for (const [i, j] of negatives) {
writer.write(rowToCsv(modelName, 0, pairFeatures(faces[i], faces[j])) + '\n');
}
totalPos += positives.length;
totalNeg += negatives.length;
modelsUsed++;
}
writer.end();
console.log(`Wrote ${OUT}`);
console.log(`Models: ${modelsUsed} used, ${skipped} skipped`);
console.log(`Pairs: ${totalPos} positive, ${totalNeg} negative (${(totalPos / (totalPos + totalNeg) * 100).toFixed(1)}% positive)`);
console.log(`Features: ${FEATURE_KEYS.length}`);
}
main();