From 20f63e07cd5c2487eb3a778b15bc28e21d2df66a Mon Sep 17 00:00:00 2001 From: Kenearos Date: Tue, 12 May 2026 00:16:27 +0200 Subject: [PATCH] feat(image-import): add normalizeName, matchNames (exact+fuzzy), classify slot helper --- image-import.js | 71 ++++++++++++++++++++++++++++++++++ test-suite.js | 101 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 172 insertions(+) diff --git a/image-import.js b/image-import.js index 25dd8a8..4406760 100644 --- a/image-import.js +++ b/image-import.js @@ -203,6 +203,77 @@ class ImageImporter { } return dp[m][n]; } + + /** + * Normalize: lowercase, trim, collapse internal whitespace. + * No umlaut folding (per spec section 10.1). + * @param {string} name + * @returns {string} + */ + normalizeName(name) { + return String(name).toLowerCase().trim().replace(/\s+/g, ' '); + } + + /** + * For each extracted entry, try exact-normalized match against existing employees; + * else compute Levenshtein nearest with distance <= 2. + * @param {Array<{name:string,date:string,share:number}>} extractedEntries + * @param {string[]} existingEmployees + * @returns {{ matched: Array<{entry:object, resolvedName:string}>, unknowns: Array<{candidate:string, suggested:string|null}> }} + */ + matchNames(extractedEntries, existingEmployees) { + const normalizedMap = new Map(); + for (const emp of existingEmployees) { + normalizedMap.set(this.normalizeName(emp), emp); + } + const sortedEmployees = [...existingEmployees].sort(); + + const matched = []; + const unknownsByCandidate = new Map(); + + for (const entry of extractedEntries) { + const normCandidate = this.normalizeName(entry.name); + if (normalizedMap.has(normCandidate)) { + matched.push({ entry, resolvedName: normalizedMap.get(normCandidate) }); + continue; + } + + let best = null; + let bestDist = Infinity; + for (const emp of sortedEmployees) { + const d = this.levenshtein(normCandidate, this.normalizeName(emp)); + if (d < bestDist) { + bestDist = d; + best = emp; + } + } + const suggested = (best !== null && bestDist <= 2) ? best : null; + + if (!unknownsByCandidate.has(entry.name)) { + unknownsByCandidate.set(entry.name, { candidate: entry.name, suggested }); + } + } + + return { matched, unknowns: Array.from(unknownsByCandidate.values()) }; + } + + /** + * Slot classification, duplicated from Feature B per spec section 9.3 (independent feature). + * @param {Date} date + * @returns {'fr'|'sa'|'so'|'weekday'} + */ + classify(date) { + const wd = date.getDay(); + if (wd === 5) return 'fr'; + if (wd === 6) return 'sa'; + if (wd === 0) return 'so'; + const isFeiertag = this.holidayProvider && this.holidayProvider.isHoliday(date); + const isTagVorFeiertag = this.holidayProvider && this.holidayProvider.isDayBeforeHoliday(date); + if (isFeiertag && isTagVorFeiertag) return 'sa'; + if (isTagVorFeiertag) return 'fr'; + if (isFeiertag) return 'so'; + return 'weekday'; + } } // Verbatim system prompt — German with Umlaute (per spec §7.3). diff --git a/test-suite.js b/test-suite.js index 492f63b..427af99 100644 --- a/test-suite.js +++ b/test-suite.js @@ -747,6 +747,107 @@ runner.test('Levenshtein: 2 Distanz', (t) => { t.assertEqual(importer.levenshtein('mueller', 'mueler'), 1, 'ein l weniger'); }); +// ============================================================================ +// ImageImporter Tests - normalizeName, matchNames, classify (Feature A) +// ============================================================================ + +runner.test('Match: exakter Match (case + whitespace identisch)', (t) => { + const importer = new ImageImporter(null); + const r = importer.matchNames( + [{ name: 'Max Mustermann', date: '2025-11-22', share: 1.0 }], + ['Max Mustermann'] + ); + t.assertEqual(r.matched.length, 1, '1 zugeordnet'); + t.assertEqual(r.matched[0].resolvedName, 'Max Mustermann', 'Direkt aufgeloest'); + t.assertEqual(r.unknowns.length, 0, 'Keine Unknowns'); +}); + +runner.test('Match: normalisierter Match (Whitespace + Case)', (t) => { + const importer = new ImageImporter(null); + const r = importer.matchNames( + [{ name: ' MAX mustermann ', date: '2025-11-22', share: 1.0 }], + ['Max Mustermann'] + ); + t.assertEqual(r.matched.length, 1, '1 zugeordnet'); + t.assertEqual(r.matched[0].resolvedName, 'Max Mustermann', 'Normalisiert aufgeloest'); +}); + +runner.test('Match: Fuzzy mit Distance 1', (t) => { + const importer = new ImageImporter(null); + const r = importer.matchNames( + [{ name: 'Max Mustermannn', date: '2025-11-22', share: 1.0 }], + ['Max Mustermann'] + ); + t.assertEqual(r.matched.length, 0, 'Nicht automatisch gematcht'); + t.assertEqual(r.unknowns.length, 1, '1 Unknown'); + t.assertEqual(r.unknowns[0].suggested, 'Max Mustermann', 'Vorschlag = naechster'); + t.assertEqual(r.unknowns[0].candidate, 'Max Mustermannn', 'Original-Kandidat'); +}); + +runner.test('Match: Distance > 2 ohne Vorschlag', (t) => { + const importer = new ImageImporter(null); + const r = importer.matchNames( + [{ name: 'Egon Olsen', date: '2025-11-22', share: 1.0 }], + ['Max Mustermann'] + ); + t.assertEqual(r.unknowns.length, 1, '1 Unknown'); + t.assertEqual(r.unknowns[0].suggested, null, 'Kein Vorschlag'); +}); + +runner.test('Match: leere Employee-Liste alle Unknowns', (t) => { + const importer = new ImageImporter(null); + const r = importer.matchNames( + [{ name: 'Max', date: '2025-11-22', share: 1.0 }], + [] + ); + t.assertEqual(r.unknowns.length, 1, 'Unknown'); + t.assertEqual(r.unknowns[0].suggested, null, 'Kein Vorschlag moeglich'); +}); + +runner.test('Match: mehrere Fuzzy-Treffer gleiche Distanz alphabetisch erster', (t) => { + const importer = new ImageImporter(null); + const r = importer.matchNames( + [{ name: 'Anne', date: '2025-11-22', share: 1.0 }], + ['Anna', 'Anni'] + ); + t.assertEqual(r.unknowns[0].suggested, 'Anna', 'Alphabetisch erster'); +}); + +runner.test('Classify: Freitag = fr', (t) => { + const importer = new ImageImporter(null); + importer.holidayProvider = new HolidayProvider(); + const fri = new Date('2025-11-21T12:00:00'); + t.assertEqual(importer.classify(fri), 'fr', 'Freitag'); +}); + +runner.test('Classify: Samstag = sa', (t) => { + const importer = new ImageImporter(null); + importer.holidayProvider = new HolidayProvider(); + const sat = new Date('2025-11-22T12:00:00'); + t.assertEqual(importer.classify(sat), 'sa', 'Samstag'); +}); + +runner.test('Classify: Feiertag (Werktag) = so', (t) => { + const importer = new ImageImporter(null); + importer.holidayProvider = new HolidayProvider(); + const may1 = new Date('2025-05-01T12:00:00'); + t.assertEqual(importer.classify(may1), 'so', 'Feiertag = so'); +}); + +runner.test('Classify: Tag vor Feiertag (Werktag) = fr', (t) => { + const importer = new ImageImporter(null); + importer.holidayProvider = new HolidayProvider(); + const apr30 = new Date('2025-04-30T12:00:00'); + t.assertEqual(importer.classify(apr30), 'fr', 'Tag vor Feiertag'); +}); + +runner.test('Classify: Werktag = weekday', (t) => { + const importer = new ImageImporter(null); + importer.holidayProvider = new HolidayProvider(); + const mon = new Date('2025-11-24T12:00:00'); + t.assertEqual(importer.classify(mon), 'weekday', 'Werktag'); +}); + // ============================================================================ // Display Functions // ============================================================================