feat: AcroForm-Fill via Claude CLI, Multi-Source, Kanagawa, Docker-Deploy
Komplettes Rework der AI-Studio-Vorlage zu einem produktiven Werkzeug fuer
deutsche AcroForm-Formulare (Reha-Antraege, Arzt-Befundberichte):
- Backend: Express spawnt headless Claude CLI ('claude -p --output-format json'
via stdin-Pipe). Prompt enthaelt die Feldnamen als Ziel-Schema plus die
Arbeitsregeln (Stichwortstil, feste Zeichen-Kaestchen ohne Leerzeichen,
Vordrucke respektieren, keine geratenen Werte, nur medizinisch).
- PDF-Handling: pdfjs-dist statt pdf-lib — pdf-lib scheitert an verschluesselten
Object-Streams in DRV-Formularen. annotationStorage + saveDocument, kein
Flatten. Worker-Patch zur Laufzeit forciert Auto-Size und schwarze Schrift.
- Multi-Source-Upload: beliebig viele PDFs/Bilder + optional Freitext.
- Design: Kanagawa Design System (Preset aus ../kanagawa-design-system),
Tailwind lokal gebaut statt CDN, Dark/Light-Toggle, Progress-Indicator.
- Deployment: Multi-Stage-Dockerfile, docker-compose in matrix_default-Netz,
Claude-Credentials vom Host per Volume. PLAN.md + AGENTS.md (Alex-Schema).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
d6cab4aeb5
commit
3c669fb003
28 changed files with 6756 additions and 934 deletions
48
services/api.ts
Normal file
48
services/api.ts
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
import type { FileData, FormResponse } from '../types';
|
||||
import type { PdfFieldInfo } from './pdfService';
|
||||
|
||||
export async function processDocuments(
|
||||
formFile: FileData,
|
||||
sourceFiles: FileData[],
|
||||
sourceText: string,
|
||||
pdfFields: PdfFieldInfo[]
|
||||
): Promise<FormResponse> {
|
||||
if (pdfFields.length === 0) {
|
||||
throw new Error(
|
||||
'Das Ziel-PDF enthält keine AcroForm-Felder. ' +
|
||||
'Nur Formulare mit interaktiven Feldern werden unterstützt.'
|
||||
);
|
||||
}
|
||||
if (sourceFiles.length === 0 && sourceText.trim().length === 0) {
|
||||
throw new Error('Mindestens ein Quelldokument oder Text wird benötigt.');
|
||||
}
|
||||
|
||||
const body = new FormData();
|
||||
body.append('form', formFile.file, formFile.file.name);
|
||||
for (const f of sourceFiles) {
|
||||
body.append('sources', f.file, f.file.name);
|
||||
}
|
||||
if (sourceText.trim().length > 0) {
|
||||
body.append('sourceText', sourceText);
|
||||
}
|
||||
body.append('fields', JSON.stringify(pdfFields));
|
||||
|
||||
const res = await fetch('/api/process', {
|
||||
method: 'POST',
|
||||
body,
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
let message = `Server antwortete mit ${res.status}`;
|
||||
try {
|
||||
const data = await res.json();
|
||||
if (data?.error) message = data.error;
|
||||
if (data?.details) message += ` — ${data.details}`;
|
||||
} catch {
|
||||
// fall through
|
||||
}
|
||||
throw new Error(message);
|
||||
}
|
||||
|
||||
return (await res.json()) as FormResponse;
|
||||
}
|
||||
|
|
@ -1,155 +0,0 @@
|
|||
import { GoogleGenAI, Type, Schema } from "@google/genai";
|
||||
import { FileData, FormResponse } from "../types";
|
||||
import { PdfFieldInfo } from "./pdfService";
|
||||
|
||||
const ai = new GoogleGenAI({ apiKey: process.env.API_KEY });
|
||||
|
||||
const responseSchema: Schema = {
|
||||
type: Type.OBJECT,
|
||||
properties: {
|
||||
summary: {
|
||||
type: Type.STRING,
|
||||
description: "A brief summary of what document was processed."
|
||||
},
|
||||
fields: {
|
||||
type: Type.ARRAY,
|
||||
items: {
|
||||
type: Type.OBJECT,
|
||||
properties: {
|
||||
key: {
|
||||
type: Type.STRING,
|
||||
description: "The PDF field name (if available)."
|
||||
},
|
||||
label: {
|
||||
type: Type.STRING,
|
||||
description: "A human-readable label for the field."
|
||||
},
|
||||
value: {
|
||||
type: Type.STRING,
|
||||
description: "The value to fill. For checkboxes, use 'X' if true, otherwise leave empty."
|
||||
},
|
||||
sourceContext: {
|
||||
type: Type.STRING,
|
||||
description: "The exact snippet of text from the source document used to derive this value."
|
||||
},
|
||||
coordinates: {
|
||||
type: Type.OBJECT,
|
||||
description: "REQUIRED if no specific PDF field names are provided. Visual location to draw text.",
|
||||
properties: {
|
||||
pageIndex: { type: Type.INTEGER, description: "0-based page index" },
|
||||
x: { type: Type.INTEGER, description: "Horizontal position (0-1000) from Left" },
|
||||
y: { type: Type.INTEGER, description: "Vertical position (0-1000) from Top" }
|
||||
},
|
||||
required: ["pageIndex", "x", "y"]
|
||||
},
|
||||
validation: {
|
||||
type: Type.OBJECT,
|
||||
properties: {
|
||||
status: {
|
||||
type: Type.STRING,
|
||||
description: "VALID, WARNING, or INVALID."
|
||||
},
|
||||
message: {
|
||||
type: Type.STRING,
|
||||
description: "Validation message explaining any issues."
|
||||
},
|
||||
suggestion: {
|
||||
type: Type.STRING,
|
||||
description: "Alternative value suggestion."
|
||||
}
|
||||
},
|
||||
required: ["status"]
|
||||
}
|
||||
},
|
||||
required: ["label", "value", "validation"]
|
||||
}
|
||||
}
|
||||
},
|
||||
required: ["fields", "summary"]
|
||||
};
|
||||
|
||||
export const processDocuments = async (
|
||||
blankForm: FileData,
|
||||
sourceDocument: FileData,
|
||||
pdfFields: PdfFieldInfo[] = []
|
||||
): Promise<FormResponse> => {
|
||||
|
||||
const formPart = {
|
||||
inlineData: {
|
||||
data: blankForm.base64,
|
||||
mimeType: blankForm.type,
|
||||
},
|
||||
};
|
||||
|
||||
const sourcePart = {
|
||||
inlineData: {
|
||||
data: sourceDocument.base64,
|
||||
mimeType: sourceDocument.type,
|
||||
},
|
||||
};
|
||||
|
||||
let systemPrompt = `
|
||||
ROLE: Intelligent Document Processing AI (German Bureaucracy Expert).
|
||||
TASK: Extract data from the SOURCE DOCUMENT and map it to the TARGET FORM visually or logically.
|
||||
|
||||
STRICT FORMATTING RULES (German Context):
|
||||
1. DATES: Must be formatted as 'DD.MM.YYYY' (e.g., 24.01.1982). Do not use ISO or US formats.
|
||||
2. NUMBERS/CURRENCY: Use comma as decimal separator (e.g., 1.425,00). Do NOT write the currency symbol (€) if the form already has it printed.
|
||||
3. CHECKBOXES: If a condition is met (e.g., "Männlich", "Ja"), the 'value' must be "X". If not met, leave empty.
|
||||
|
||||
CRITICAL: Verify every extraction. If ambiguous, set validation.status to 'WARNING'.
|
||||
`;
|
||||
|
||||
if (pdfFields.length > 0) {
|
||||
const fieldList = pdfFields.map(f => `"${f.name}" (${f.type})`).join(", ");
|
||||
systemPrompt += `
|
||||
MODE: FILLABLE PDF (AcroForm).
|
||||
Map extracted data to these exact field IDs: [${fieldList}].
|
||||
`;
|
||||
} else {
|
||||
systemPrompt += `
|
||||
MODE: VISUAL FILLING (Flat Scan/Image).
|
||||
The target form has NO digital fields. You must estimate COORDINATES.
|
||||
|
||||
COORDINATE SYSTEM (0-1000):
|
||||
- x=0, y=0 is Top-Left.
|
||||
- x=1000, y=1000 is Bottom-Right.
|
||||
|
||||
STRATEGY:
|
||||
1. Analyze the blank form image. Identify where user input belongs (lines, boxes).
|
||||
2. For "Reisekosten" (Travel Expenses): Look for columns like "Fahrtkosten", "Übernachtung". accurately place the amounts in the "Betrag" column.
|
||||
3. Place text slightly ABOVE the underline so it looks natural.
|
||||
4. For Checkboxes: Estimate the center of the square box.
|
||||
`;
|
||||
}
|
||||
|
||||
try {
|
||||
const modelId = "gemini-3-flash-preview";
|
||||
|
||||
const response = await ai.models.generateContent({
|
||||
model: modelId,
|
||||
contents: {
|
||||
parts: [
|
||||
formPart,
|
||||
{ text: "TARGET FORM (Blank)" },
|
||||
sourcePart,
|
||||
{ text: "SOURCE DATA (Email/Receipts)" },
|
||||
]
|
||||
},
|
||||
config: {
|
||||
responseMimeType: "application/json",
|
||||
responseSchema: responseSchema,
|
||||
systemInstruction: systemPrompt
|
||||
}
|
||||
});
|
||||
|
||||
const text = response.text;
|
||||
if (!text) throw new Error("No response from Gemini");
|
||||
|
||||
return JSON.parse(text) as FormResponse;
|
||||
|
||||
} catch (error) {
|
||||
console.error("Gemini API Error:", error);
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
|
@ -1,105 +1,144 @@
|
|||
import { PDFDocument, PDFTextField, PDFCheckBox, StandardFonts, rgb } from 'pdf-lib';
|
||||
import { ExtractedField } from '../types';
|
||||
import * as pdfjs from 'pdfjs-dist';
|
||||
// @ts-expect-error - ?url-Import ist Vite-spezifisch, wird zur Build-Zeit aufgelöst
|
||||
import workerUrl from 'pdfjs-dist/build/pdf.worker.mjs?url';
|
||||
import type { ExtractedField } from '../types';
|
||||
|
||||
// pdfjs rendert AcroForm-Text mit der im PDF definierten Default-Appearance
|
||||
// (Font-Größe + Farbe). In Behördenformularen ist das oft 10–11pt und blau,
|
||||
// was im ausgefüllten PDF zu groß und farblich falsch aussieht.
|
||||
//
|
||||
// Lösung: Das pdf.worker.mjs-Script wird zur Laufzeit gezogen, zwei Stellen
|
||||
// gepatcht und als Blob-URL als Worker-Src gesetzt:
|
||||
// 1. fontSize → 0 ⇒ pdfjs schaltet auf Auto-Size (passt sich an Feldhöhe an)
|
||||
// 2. fontColor → schwarz
|
||||
// So bleibt der Rest des PDFs unverändert, nur Textwerte rendern kleiner/schwarz.
|
||||
let workerPromise: Promise<void> | null = null;
|
||||
|
||||
function ensureWorker(): Promise<void> {
|
||||
if (workerPromise) return workerPromise;
|
||||
workerPromise = (async () => {
|
||||
try {
|
||||
const res = await fetch(workerUrl as string);
|
||||
if (!res.ok) throw new Error(`worker fetch failed: ${res.status}`);
|
||||
let src = await res.text();
|
||||
|
||||
// 1. Auto-Size erzwingen
|
||||
src = src.replace(
|
||||
/let\s*\{\s*fontSize\s*\}\s*=\s*this\.data\.defaultAppearanceData;/,
|
||||
'let fontSize = 0; void this.data.defaultAppearanceData;'
|
||||
);
|
||||
|
||||
// 2. Font-Farbe schwarz erzwingen (Arbeitsregel: Schrift schwarz)
|
||||
src = src.replace(
|
||||
/const\s*\{\s*fontName,\s*fontColor\s*\}\s*=\s*this\.data\.defaultAppearanceData;/,
|
||||
'const { fontName } = this.data.defaultAppearanceData; const fontColor = new Uint8ClampedArray([0,0,0]);'
|
||||
);
|
||||
|
||||
const blob = new Blob([src], { type: 'text/javascript' });
|
||||
pdfjs.GlobalWorkerOptions.workerSrc = URL.createObjectURL(blob);
|
||||
} catch (e) {
|
||||
// Fallback: ungepatchter Worker — lieber laufen lassen als App brechen.
|
||||
console.warn('[pdfService] worker patch failed, falling back', e);
|
||||
pdfjs.GlobalWorkerOptions.workerSrc = workerUrl as string;
|
||||
}
|
||||
})();
|
||||
return workerPromise;
|
||||
}
|
||||
|
||||
export interface PdfFieldInfo {
|
||||
name: string;
|
||||
type: string;
|
||||
type: string; // 'Tx' | 'Btn' | 'Ch' | 'Sig'
|
||||
}
|
||||
|
||||
export const getPdfFields = async (base64: string): Promise<PdfFieldInfo[]> => {
|
||||
interface WidgetInfo {
|
||||
id: string;
|
||||
fieldName: string;
|
||||
fieldType: string;
|
||||
}
|
||||
|
||||
function base64ToUint8Array(base64: string): Uint8Array {
|
||||
const binary = atob(base64);
|
||||
const bytes = new Uint8Array(binary.length);
|
||||
for (let i = 0; i < binary.length; i++) bytes[i] = binary.charCodeAt(i);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
async function collectWidgets(
|
||||
data: Uint8Array
|
||||
): Promise<{ doc: pdfjs.PDFDocumentProxy; widgets: WidgetInfo[] }> {
|
||||
await ensureWorker();
|
||||
const loadingTask = pdfjs.getDocument({ data, isEvalSupported: false });
|
||||
const doc = await loadingTask.promise;
|
||||
const widgets: WidgetInfo[] = [];
|
||||
|
||||
for (let pageIdx = 1; pageIdx <= doc.numPages; pageIdx++) {
|
||||
const page = await doc.getPage(pageIdx);
|
||||
const anns = await page.getAnnotations();
|
||||
for (const ann of anns) {
|
||||
if (ann.subtype !== 'Widget') continue;
|
||||
if (!ann.fieldName) continue;
|
||||
widgets.push({
|
||||
id: ann.id,
|
||||
fieldName: ann.fieldName,
|
||||
fieldType: ann.fieldType,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return { doc, widgets };
|
||||
}
|
||||
|
||||
export async function getPdfFields(base64: string): Promise<PdfFieldInfo[]> {
|
||||
try {
|
||||
const pdfDoc = await PDFDocument.load(base64);
|
||||
const form = pdfDoc.getForm();
|
||||
const fields = form.getFields();
|
||||
return fields.map(f => ({
|
||||
name: f.getName(),
|
||||
type: f.constructor.name
|
||||
}));
|
||||
} catch (error) {
|
||||
console.warn("Failed to extract PDF fields", error);
|
||||
const data = base64ToUint8Array(base64);
|
||||
const { widgets } = await collectWidgets(data);
|
||||
const seen = new Set<string>();
|
||||
const unique: PdfFieldInfo[] = [];
|
||||
for (const w of widgets) {
|
||||
if (seen.has(w.fieldName)) continue;
|
||||
seen.add(w.fieldName);
|
||||
unique.push({ name: w.fieldName, type: w.fieldType });
|
||||
}
|
||||
return unique;
|
||||
} catch (e) {
|
||||
console.warn('[pdfService] getPdfFields failed:', e);
|
||||
return [];
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
export const createFilledPdf = async (base64: string, fields: ExtractedField[], isFillable: boolean): Promise<Uint8Array> => {
|
||||
const pdfDoc = await PDFDocument.load(base64);
|
||||
const pages = pdfDoc.getPages();
|
||||
const font = await pdfDoc.embedFont(StandardFonts.Helvetica);
|
||||
|
||||
if (isFillable) {
|
||||
try {
|
||||
const form = pdfDoc.getForm();
|
||||
const fieldMap: Record<string, string> = {};
|
||||
fields.forEach(f => {
|
||||
if (f.key) fieldMap[f.key] = f.value;
|
||||
});
|
||||
export async function createFilledPdf(
|
||||
base64: string,
|
||||
fields: ExtractedField[]
|
||||
): Promise<Uint8Array> {
|
||||
const data = base64ToUint8Array(base64);
|
||||
const { doc, widgets } = await collectWidgets(data);
|
||||
|
||||
for (const [key, value] of Object.entries(fieldMap)) {
|
||||
try {
|
||||
const field = form.getField(key);
|
||||
if (!field) continue;
|
||||
|
||||
if (field instanceof PDFTextField) {
|
||||
field.setText(String(value));
|
||||
} else if (field instanceof PDFCheckBox) {
|
||||
const isChecked = String(value).toLowerCase() === 'true' || String(value).toLowerCase() === 'yes' || String(value).toLowerCase() === 'x';
|
||||
if (isChecked) field.check();
|
||||
else field.uncheck();
|
||||
}
|
||||
} catch (e) {
|
||||
// Field might be read-only or tricky
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn("Error filling form fields", e);
|
||||
}
|
||||
} else {
|
||||
// VISUAL OVERLAY MODE
|
||||
// Iterate through fields and draw them at specific coordinates
|
||||
|
||||
for (const field of fields) {
|
||||
// Skip if no value or no coordinates
|
||||
if (!field.value || !field.coordinates) continue;
|
||||
|
||||
const { pageIndex, x, y } = field.coordinates;
|
||||
|
||||
// Safety check for page index and coordinates
|
||||
if (typeof pageIndex !== 'number' || pageIndex < 0 || pageIndex >= pages.length) continue;
|
||||
if (isNaN(x) || isNaN(y)) continue;
|
||||
|
||||
const page = pages[pageIndex];
|
||||
const { width, height } = page.getSize();
|
||||
|
||||
// Convert 0-1000 coordinates to PDF Point coordinates
|
||||
// PDF (0,0) is bottom-left.
|
||||
// API (0,0) is top-left.
|
||||
// x = (x / 1000) * width
|
||||
// y = height - (y / 1000) * height
|
||||
|
||||
const pdfX = (x / 1000) * width;
|
||||
const pdfY = height - (y / 1000) * height;
|
||||
|
||||
// Adjust slightly for font height (text is drawn from baseline)
|
||||
// A small nudge down (subtract from Y) helps align with lines usually.
|
||||
const adjustedY = pdfY - 4;
|
||||
|
||||
try {
|
||||
page.drawText(field.value, {
|
||||
x: pdfX,
|
||||
y: adjustedY,
|
||||
size: 10,
|
||||
font: font,
|
||||
color: rgb(0, 0, 0),
|
||||
});
|
||||
} catch (e) {
|
||||
console.warn(`Failed to draw field ${field.label}`, e);
|
||||
}
|
||||
}
|
||||
const byName = new Map<string, ExtractedField>();
|
||||
for (const f of fields) {
|
||||
if (f.key) byName.set(f.key, f);
|
||||
}
|
||||
|
||||
return await pdfDoc.save();
|
||||
};
|
||||
|
||||
export const fillPdf = async (base64: string, fieldValues: Record<string, string | boolean>): Promise<Uint8Array> => {
|
||||
return new Uint8Array();
|
||||
};
|
||||
const store = doc.annotationStorage;
|
||||
|
||||
for (const w of widgets) {
|
||||
const source = byName.get(w.fieldName);
|
||||
if (!source) continue;
|
||||
|
||||
if (w.fieldType === 'Tx') {
|
||||
store.setValue(w.id, { value: source.value ?? '' });
|
||||
} else if (w.fieldType === 'Btn') {
|
||||
const checked = isTruthyCheckbox(source.value);
|
||||
store.setValue(w.id, { value: checked });
|
||||
} else if (w.fieldType === 'Ch') {
|
||||
store.setValue(w.id, { value: source.value ?? '' });
|
||||
}
|
||||
// Sig (Signature) wird übersprungen.
|
||||
}
|
||||
|
||||
return await doc.saveDocument();
|
||||
}
|
||||
|
||||
function isTruthyCheckbox(value: string): boolean {
|
||||
const v = (value ?? '').trim().toLowerCase();
|
||||
return v === 'x' || v === 'ja' || v === 'yes' || v === 'true' || v === '1';
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue