// Phase 1: extract face-pair training data for the share-prediction classifier. // // Usage: // node extract_pairs.mjs // MODELS_DIR=path/to/dir node extract_pairs.mjs // Output: pairs.csv next to this script (~250k rows on a 600-model corpus) // // Label: 1 if the two faces share a normalized UV rect within the same texture // in the source bbmodel, 0 otherwise. Negatives are HARD negatives — sampled // from the same model + biased toward the same parent or similar dimensions — // so the classifier learns "looks shareable but artist chose not to," not the // trivial "obviously different sizes." import fs from 'node:fs'; import path from 'node:path'; import { fileURLToPath } from 'node:url'; const HERE = path.dirname(fileURLToPath(import.meta.url)); const ROOT = path.resolve(process.argv[2] || process.env.MODELS_DIR || path.join(HERE, '..', 'bbmodel')); const OUT = path.join(HERE, 'pairs.csv'); if (!fs.existsSync(ROOT)) { console.error(`Models dir not found: ${ROOT}`); console.error('Pass it as an argument or set MODELS_DIR.'); process.exit(1); } const FACE_NAMES = ['north', 'east', 'south', 'west', 'up', 'down']; const DIR_IDX = Object.fromEntries(FACE_NAMES.map((n, i) => [n, i])); const OPPOSITE = { north: 'south', south: 'north', east: 'west', west: 'east', up: 'down', down: 'up' }; const AXIS = { north: 'z', south: 'z', east: 'x', west: 'x', up: 'y', down: 'y' }; const TARGET_POSITIVES_PER_MODEL = 200; const NEG_RATIO = 1.0; // negatives per positive const RNG_SEED = 42; let rngState = RNG_SEED; function rand() { rngState = (rngState * 1664525 + 1013904223) >>> 0; return rngState / 0x100000000; } function shuffle(arr) { for (let i = arr.length - 1; i > 0; i--) { const j = Math.floor(rand() * (i + 1)); [arr[i], arr[j]] = [arr[j], arr[i]]; } return arr; } function buildCubeGroupMap(nodes, parentId, parentDepth, out, groupSeq) { for (const node of nodes) { if (typeof node === 'string') { out.set(node, { parentId, depth: parentDepth }); } else if (node && Array.isArray(node.children)) { const id = node.uuid || `group_${groupSeq.n++}`; buildCubeGroupMap(node.children, id, parentDepth + 1, out, groupSeq); } } } function extractFacesFromModel(filePath) { const data = JSON.parse(fs.readFileSync(filePath, 'utf8')); if (data.meta?.box_uv === true) return null; const elements = data.elements || []; const cubeGroup = new Map(); buildCubeGroupMap(data.outliner || [], null, 0, cubeGroup, { n: 0 }); const faces = []; for (const el of elements) { if (el.type && el.type !== 'cube') continue; if (!el.faces) continue; const from = el.from || [0, 0, 0]; const to = el.to || [0, 0, 0]; const cubeW = Math.abs(to[0] - from[0]); const cubeH = Math.abs(to[1] - from[1]); const cubeD = Math.abs(to[2] - from[2]); const center = [ (from[0] + to[0]) / 2, (from[1] + to[1]) / 2, (from[2] + to[2]) / 2, ]; const rot = el.rotation || [0, 0, 0]; const grp = cubeGroup.get(el.uuid) || { parentId: '__root__', depth: 0 }; for (const fname of FACE_NAMES) { const face = el.faces[fname]; if (!face || !face.uv) continue; const tex = face.texture; if (tex === null || tex === undefined || tex === false) continue; const [x1, y1, x2, y2] = face.uv; const w = Math.abs(x2 - x1); const h = Math.abs(y2 - y1); if (w === 0 || h === 0) continue; const minX = Math.min(x1, x2); const minY = Math.min(y1, y2); const texKey = String(tex); const shareKey = `${texKey}|${minX.toFixed(4)},${minY.toFixed(4)},${w.toFixed(4)},${h.toFixed(4)}`; faces.push({ cube: el.uuid, cubeName: el.name || '', parentId: grp.parentId, depth: grp.depth, cubeW, cubeH, cubeD, cx: center[0], cy: center[1], cz: center[2], rotX: rot[0], rotY: rot[1], rotZ: rot[2], hasRot: rot[0] !== 0 || rot[1] !== 0 || rot[2] !== 0 ? 1 : 0, dir: fname, uvW: w, uvH: h, flipX: x2 < x1 ? 1 : 0, flipY: y2 < y1 ? 1 : 0, texKey, shareKey, }); } } return faces; } function pairFeatures(a, b) { const aspectA = a.uvW / a.uvH; const aspectB = b.uvW / b.uvH; const areaA = a.uvW * a.uvH; const areaB = b.uvW * b.uvH; const dx = a.cx - b.cx; const dy = a.cy - b.cy; const dz = a.cz - b.cz; return { a_dir: DIR_IDX[a.dir], b_dir: DIR_IDX[b.dir], a_uvW: a.uvW, a_uvH: a.uvH, b_uvW: b.uvW, b_uvH: b.uvH, abs_w_diff: Math.abs(a.uvW - b.uvW), abs_h_diff: Math.abs(a.uvH - b.uvH), swap_w_diff: Math.abs(a.uvW - b.uvH), // useful when shapes match after rotation swap_h_diff: Math.abs(a.uvH - b.uvW), area_min: Math.min(areaA, areaB), area_max: Math.max(areaA, areaB), area_ratio: Math.min(areaA, areaB) / Math.max(areaA, areaB), aspect_diff: Math.abs(aspectA - aspectB), a_cubeW: a.cubeW, a_cubeH: a.cubeH, a_cubeD: a.cubeD, b_cubeW: b.cubeW, b_cubeH: b.cubeH, b_cubeD: b.cubeD, cube_dim_match: (a.cubeW === b.cubeW && a.cubeH === b.cubeH && a.cubeD === b.cubeD) ? 1 : 0, cube_w_diff: Math.abs(a.cubeW - b.cubeW), cube_h_diff: Math.abs(a.cubeH - b.cubeH), cube_d_diff: Math.abs(a.cubeD - b.cubeD), same_cube: a.cube === b.cube ? 1 : 0, same_parent: a.parentId === b.parentId ? 1 : 0, direction_match: a.dir === b.dir ? 1 : 0, direction_opposite: OPPOSITE[a.dir] === b.dir ? 1 : 0, same_axis: AXIS[a.dir] === AXIS[b.dir] ? 1 : 0, a_axis: AXIS[a.dir], // categorical b_axis: AXIS[b.dir], flip_match: (a.flipX === b.flipX && a.flipY === b.flipY) ? 1 : 0, has_rot_either: (a.hasRot || b.hasRot) ? 1 : 0, rot_match: (a.rotX === b.rotX && a.rotY === b.rotY && a.rotZ === b.rotZ) ? 1 : 0, cube_dist: Math.sqrt(dx * dx + dy * dy + dz * dz), same_texture: a.texKey === b.texKey ? 1 : 0, depth_diff: Math.abs(a.depth - b.depth), }; } const FEATURE_KEYS = [ 'a_dir', 'b_dir', 'a_uvW', 'a_uvH', 'b_uvW', 'b_uvH', 'abs_w_diff', 'abs_h_diff', 'swap_w_diff', 'swap_h_diff', 'area_min', 'area_max', 'area_ratio', 'aspect_diff', 'a_cubeW', 'a_cubeH', 'a_cubeD', 'b_cubeW', 'b_cubeH', 'b_cubeD', 'cube_dim_match', 'cube_w_diff', 'cube_h_diff', 'cube_d_diff', 'same_cube', 'same_parent', 'direction_match', 'direction_opposite', 'same_axis', 'a_axis', 'b_axis', 'flip_match', 'has_rot_either', 'rot_match', 'cube_dist', 'same_texture', 'depth_diff', ]; const AXIS_IDX = { x: 0, y: 1, z: 2 }; function encodeAxis(v) { return AXIS_IDX[v]; } function rowToCsv(modelName, label, feat) { const cells = [JSON.stringify(modelName), label]; for (const k of FEATURE_KEYS) { let v = feat[k]; if (k === 'a_axis' || k === 'b_axis') v = encodeAxis(v); cells.push(typeof v === 'number' ? (Number.isInteger(v) ? v : v.toFixed(4)) : v); } return cells.join(','); } function buildPairsForModel(faces, modelName) { // Group faces by share key (only considers within-texture sharing) const shareMap = new Map(); for (let i = 0; i < faces.length; i++) { if (!shareMap.has(faces[i].shareKey)) shareMap.set(faces[i].shareKey, []); shareMap.get(faces[i].shareKey).push(i); } // Positive pairs: every (i, j) within a share group of size >= 2 const positives = []; for (const idxs of shareMap.values()) { if (idxs.length < 2) continue; for (let i = 0; i < idxs.length; i++) { for (let j = i + 1; j < idxs.length; j++) { positives.push([idxs[i], idxs[j]]); } } } shuffle(positives); const positivesCapped = positives.slice(0, TARGET_POSITIVES_PER_MODEL); // Hard negatives: same texture, biased toward same-parent or close-dim faces. // Build candidate pool keyed by texture for fast lookup. const facesByTex = new Map(); for (let i = 0; i < faces.length; i++) { const t = faces[i].texKey; if (!facesByTex.has(t)) facesByTex.set(t, []); facesByTex.get(t).push(i); } const positiveSet = new Set(positives.map(([a, b]) => a < b ? `${a},${b}` : `${b},${a}`)); const targetNegatives = Math.max(positivesCapped.length, Math.round(positivesCapped.length * NEG_RATIO)); const negatives = []; let attempts = 0; const maxAttempts = targetNegatives * 30; while (negatives.length < targetNegatives && attempts < maxAttempts) { attempts++; // Pick a face uniformly, then pick a partner from the same texture pool, // preferring same-parent. const a = Math.floor(rand() * faces.length); const fa = faces[a]; const pool = facesByTex.get(fa.texKey); if (!pool || pool.length < 2) continue; // 60% chance: same-parent partner if possible; else any. let b; if (rand() < 0.6) { const sameParent = pool.filter((idx) => idx !== a && faces[idx].parentId === fa.parentId); if (sameParent.length) { b = sameParent[Math.floor(rand() * sameParent.length)]; } else { b = pool[Math.floor(rand() * pool.length)]; } } else { b = pool[Math.floor(rand() * pool.length)]; } if (b === a) continue; const key = a < b ? `${a},${b}` : `${b},${a}`; if (positiveSet.has(key)) continue; // Optional: filter out blatant negatives (very different sizes) to avoid trivial training const fb = faces[b]; const minA = Math.min(fa.uvW, fa.uvH), maxA = Math.max(fa.uvW, fa.uvH); const minB = Math.min(fb.uvW, fb.uvH), maxB = Math.max(fb.uvW, fb.uvH); const sizeRatio = Math.min(minA / maxB, minB / maxA); if (sizeRatio < 0.1) { // Allow some, but not too many if (rand() > 0.2) continue; } negatives.push([a, b]); } return { positives: positivesCapped, negatives, faces, modelName }; } function main() { const files = fs.readdirSync(ROOT).filter((f) => f.endsWith('.bbmodel')); const writer = fs.createWriteStream(OUT); writer.write(['model', 'label', ...FEATURE_KEYS].join(',') + '\n'); let totalPos = 0, totalNeg = 0, modelsUsed = 0, skipped = 0; for (const f of files) { const fp = path.join(ROOT, f); let faces; try { faces = extractFacesFromModel(fp); } catch (e) { skipped++; continue; } if (!faces || faces.length < 2) { skipped++; continue; } const modelName = path.basename(f, '.bbmodel'); const { positives, negatives } = buildPairsForModel(faces, modelName); if (positives.length === 0) { skipped++; continue; } for (const [i, j] of positives) { writer.write(rowToCsv(modelName, 1, pairFeatures(faces[i], faces[j])) + '\n'); } for (const [i, j] of negatives) { writer.write(rowToCsv(modelName, 0, pairFeatures(faces[i], faces[j])) + '\n'); } totalPos += positives.length; totalNeg += negatives.length; modelsUsed++; } writer.end(); console.log(`Wrote ${OUT}`); console.log(`Models: ${modelsUsed} used, ${skipped} skipped`); console.log(`Pairs: ${totalPos} positive, ${totalNeg} negative (${(totalPos / (totalPos + totalNeg) * 100).toFixed(1)}% positive)`); console.log(`Features: ${FEATURE_KEYS.length}`); } main();