feat(image-import): add normalizeName, matchNames (exact+fuzzy), classify slot helper

This commit is contained in:
Kenearos 2026-05-12 00:16:27 +02:00
parent 773c286236
commit 20f63e07cd
2 changed files with 172 additions and 0 deletions

View file

@ -203,6 +203,77 @@ class ImageImporter {
}
return dp[m][n];
}
/**
* Normalize: lowercase, trim, collapse internal whitespace.
* No umlaut folding (per spec section 10.1).
* @param {string} name
* @returns {string}
*/
normalizeName(name) {
return String(name).toLowerCase().trim().replace(/\s+/g, ' ');
}
/**
* For each extracted entry, try exact-normalized match against existing employees;
* else compute Levenshtein nearest with distance <= 2.
* @param {Array<{name:string,date:string,share:number}>} extractedEntries
* @param {string[]} existingEmployees
* @returns {{ matched: Array<{entry:object, resolvedName:string}>, unknowns: Array<{candidate:string, suggested:string|null}> }}
*/
matchNames(extractedEntries, existingEmployees) {
const normalizedMap = new Map();
for (const emp of existingEmployees) {
normalizedMap.set(this.normalizeName(emp), emp);
}
const sortedEmployees = [...existingEmployees].sort();
const matched = [];
const unknownsByCandidate = new Map();
for (const entry of extractedEntries) {
const normCandidate = this.normalizeName(entry.name);
if (normalizedMap.has(normCandidate)) {
matched.push({ entry, resolvedName: normalizedMap.get(normCandidate) });
continue;
}
let best = null;
let bestDist = Infinity;
for (const emp of sortedEmployees) {
const d = this.levenshtein(normCandidate, this.normalizeName(emp));
if (d < bestDist) {
bestDist = d;
best = emp;
}
}
const suggested = (best !== null && bestDist <= 2) ? best : null;
if (!unknownsByCandidate.has(entry.name)) {
unknownsByCandidate.set(entry.name, { candidate: entry.name, suggested });
}
}
return { matched, unknowns: Array.from(unknownsByCandidate.values()) };
}
/**
* Slot classification, duplicated from Feature B per spec section 9.3 (independent feature).
* @param {Date} date
* @returns {'fr'|'sa'|'so'|'weekday'}
*/
classify(date) {
const wd = date.getDay();
if (wd === 5) return 'fr';
if (wd === 6) return 'sa';
if (wd === 0) return 'so';
const isFeiertag = this.holidayProvider && this.holidayProvider.isHoliday(date);
const isTagVorFeiertag = this.holidayProvider && this.holidayProvider.isDayBeforeHoliday(date);
if (isFeiertag && isTagVorFeiertag) return 'sa';
if (isTagVorFeiertag) return 'fr';
if (isFeiertag) return 'so';
return 'weekday';
}
}
// Verbatim system prompt — German with Umlaute (per spec §7.3).

View file

@ -747,6 +747,107 @@ runner.test('Levenshtein: 2 Distanz', (t) => {
t.assertEqual(importer.levenshtein('mueller', 'mueler'), 1, 'ein l weniger');
});
// ============================================================================
// ImageImporter Tests - normalizeName, matchNames, classify (Feature A)
// ============================================================================
runner.test('Match: exakter Match (case + whitespace identisch)', (t) => {
const importer = new ImageImporter(null);
const r = importer.matchNames(
[{ name: 'Max Mustermann', date: '2025-11-22', share: 1.0 }],
['Max Mustermann']
);
t.assertEqual(r.matched.length, 1, '1 zugeordnet');
t.assertEqual(r.matched[0].resolvedName, 'Max Mustermann', 'Direkt aufgeloest');
t.assertEqual(r.unknowns.length, 0, 'Keine Unknowns');
});
runner.test('Match: normalisierter Match (Whitespace + Case)', (t) => {
const importer = new ImageImporter(null);
const r = importer.matchNames(
[{ name: ' MAX mustermann ', date: '2025-11-22', share: 1.0 }],
['Max Mustermann']
);
t.assertEqual(r.matched.length, 1, '1 zugeordnet');
t.assertEqual(r.matched[0].resolvedName, 'Max Mustermann', 'Normalisiert aufgeloest');
});
runner.test('Match: Fuzzy mit Distance 1', (t) => {
const importer = new ImageImporter(null);
const r = importer.matchNames(
[{ name: 'Max Mustermannn', date: '2025-11-22', share: 1.0 }],
['Max Mustermann']
);
t.assertEqual(r.matched.length, 0, 'Nicht automatisch gematcht');
t.assertEqual(r.unknowns.length, 1, '1 Unknown');
t.assertEqual(r.unknowns[0].suggested, 'Max Mustermann', 'Vorschlag = naechster');
t.assertEqual(r.unknowns[0].candidate, 'Max Mustermannn', 'Original-Kandidat');
});
runner.test('Match: Distance > 2 ohne Vorschlag', (t) => {
const importer = new ImageImporter(null);
const r = importer.matchNames(
[{ name: 'Egon Olsen', date: '2025-11-22', share: 1.0 }],
['Max Mustermann']
);
t.assertEqual(r.unknowns.length, 1, '1 Unknown');
t.assertEqual(r.unknowns[0].suggested, null, 'Kein Vorschlag');
});
runner.test('Match: leere Employee-Liste alle Unknowns', (t) => {
const importer = new ImageImporter(null);
const r = importer.matchNames(
[{ name: 'Max', date: '2025-11-22', share: 1.0 }],
[]
);
t.assertEqual(r.unknowns.length, 1, 'Unknown');
t.assertEqual(r.unknowns[0].suggested, null, 'Kein Vorschlag moeglich');
});
runner.test('Match: mehrere Fuzzy-Treffer gleiche Distanz alphabetisch erster', (t) => {
const importer = new ImageImporter(null);
const r = importer.matchNames(
[{ name: 'Anne', date: '2025-11-22', share: 1.0 }],
['Anna', 'Anni']
);
t.assertEqual(r.unknowns[0].suggested, 'Anna', 'Alphabetisch erster');
});
runner.test('Classify: Freitag = fr', (t) => {
const importer = new ImageImporter(null);
importer.holidayProvider = new HolidayProvider();
const fri = new Date('2025-11-21T12:00:00');
t.assertEqual(importer.classify(fri), 'fr', 'Freitag');
});
runner.test('Classify: Samstag = sa', (t) => {
const importer = new ImageImporter(null);
importer.holidayProvider = new HolidayProvider();
const sat = new Date('2025-11-22T12:00:00');
t.assertEqual(importer.classify(sat), 'sa', 'Samstag');
});
runner.test('Classify: Feiertag (Werktag) = so', (t) => {
const importer = new ImageImporter(null);
importer.holidayProvider = new HolidayProvider();
const may1 = new Date('2025-05-01T12:00:00');
t.assertEqual(importer.classify(may1), 'so', 'Feiertag = so');
});
runner.test('Classify: Tag vor Feiertag (Werktag) = fr', (t) => {
const importer = new ImageImporter(null);
importer.holidayProvider = new HolidayProvider();
const apr30 = new Date('2025-04-30T12:00:00');
t.assertEqual(importer.classify(apr30), 'fr', 'Tag vor Feiertag');
});
runner.test('Classify: Werktag = weekday', (t) => {
const importer = new ImageImporter(null);
importer.holidayProvider = new HolidayProvider();
const mon = new Date('2025-11-24T12:00:00');
t.assertEqual(importer.classify(mon), 'weekday', 'Werktag');
});
// ============================================================================
// Display Functions
// ============================================================================