feat: Add LaTeX template-based form filling for G2210-11
Replace unreliable visual overlay mode with precise LaTeX templates: - Add LaTeX template for G2210-11 (Ärztlicher Befundbericht der WAG) - Create Python Flask backend for LaTeX compilation (latex_service.py, server.py) - Add frontend latexService.ts for API communication - Update ReviewPanel with LaTeX mode toggle and preview - Enhance Gemini prompts with G2210-11 specific field extraction - Add Dockerfile with TeX Live for Railway deployment - Update railway.toml to use Docker builder The LaTeX approach ensures accurate field placement and proper formatting for German medical/insurance forms. https://claude.ai/code/session_016pQhdznHZ74Fpkvwr3cLBq
This commit is contained in:
parent
adf8f0240e
commit
19e96ef59b
10 changed files with 1557 additions and 32 deletions
|
|
@ -2,6 +2,7 @@ import { GoogleGenAI, Type, Schema } from "@google/genai";
|
|||
import { FileData, FormResponse } from "../types";
|
||||
import { PdfFieldInfo } from "./pdfService";
|
||||
import { getApiKey } from "./apiKeyService";
|
||||
import { detectTemplate, getExpectedFields } from "./latexService";
|
||||
|
||||
const getAI = () => {
|
||||
const apiKey = getApiKey();
|
||||
|
|
@ -75,12 +76,98 @@ const responseSchema: Schema = {
|
|||
required: ["fields", "summary"]
|
||||
};
|
||||
|
||||
// G2210-11 specific field definitions for better extraction
|
||||
const G2210_FIELDS = `
|
||||
REQUIRED FIELDS FOR G2210-11 (Ärztlicher Befundbericht):
|
||||
Extract ALL of the following fields from the source document:
|
||||
|
||||
PATIENT DATA:
|
||||
- Versicherungsnummer (e.g., "12 345678 A 123")
|
||||
- ABT.-Nr. (Aktenzeichen/Abteilungsnummer)
|
||||
- Name, Vorname (Full name: "Nachname, Vorname")
|
||||
- Geburtsdatum (format: DD.MM.YYYY)
|
||||
- Geschlecht (männlich/weiblich/divers)
|
||||
- Straße, Hausnummer
|
||||
- PLZ
|
||||
- Ort
|
||||
- Telefon
|
||||
- Krankenkasse
|
||||
|
||||
EMPLOYMENT:
|
||||
- Derzeitige Tätigkeit (Beruf)
|
||||
- Arbeitgeber
|
||||
- Arbeitsunfähig seit (date: DD.MM.YYYY)
|
||||
- Letzte Arbeitsaufnahme
|
||||
|
||||
DIAGNOSES (up to 6, with ICD-10 codes):
|
||||
- Diagnose 1 + Diagnose 1 ICD
|
||||
- Diagnose 2 + Diagnose 2 ICD
|
||||
- Diagnose 3 + Diagnose 3 ICD
|
||||
- Diagnose 4 + Diagnose 4 ICD
|
||||
- Diagnose 5 + Diagnose 5 ICD
|
||||
- Diagnose 6 + Diagnose 6 ICD
|
||||
|
||||
ANAMNESIS:
|
||||
- Anamnese/Beschwerden (patient symptoms and history)
|
||||
- Krankheitsverlauf (disease progression, previous treatments)
|
||||
- Körperlicher Befund (physical examination findings)
|
||||
|
||||
FUNCTIONAL LIMITATIONS (mark as "keine", "gering", or "erheblich"):
|
||||
- Mobilität keine/gering/erheblich
|
||||
- Selbstversorgung keine/gering/erheblich
|
||||
- Haushaltsführung keine/gering/erheblich
|
||||
- Erwerbstätigkeit keine/gering/erheblich
|
||||
- Kommunikation keine/gering/erheblich
|
||||
- Psychische Belastbarkeit keine/gering/erheblich
|
||||
- Beeinträchtigungen Erläuterung
|
||||
|
||||
MEDICATION (up to 5):
|
||||
- Medikament 1 + Medikament 1 Dosis + Medikament 1 Seit
|
||||
- Medikament 2 + Medikament 2 Dosis + Medikament 2 Seit
|
||||
- Medikament 3 + Medikament 3 Dosis + Medikament 3 Seit
|
||||
- Medikament 4 + Medikament 4 Dosis + Medikament 4 Seit
|
||||
- Medikament 5 + Medikament 5 Dosis + Medikament 5 Seit
|
||||
- Physikalische Therapie
|
||||
|
||||
PREVIOUS REHABILITATION:
|
||||
- Reha 1 Zeitraum + Reha 1 Einrichtung + Reha 1 Erfolg
|
||||
- Reha 2 Zeitraum + Reha 2 Einrichtung + Reha 2 Erfolg
|
||||
|
||||
ASSESSMENT:
|
||||
- Leistungsvermögen (vollschichtig/3-6 Stunden/unter 3 Stunden)
|
||||
- Rehabilitationsbedürftigkeit (reasoning for rehab need)
|
||||
- Rehabilitationsziel
|
||||
- Rehabilitationsform (stationär/ambulant/ganztägig ambulant)
|
||||
- Reha Einrichtung Empfehlung
|
||||
|
||||
TRAVEL:
|
||||
- Reisefähig (ja/nein)
|
||||
- Reisefähig Begründung (if no)
|
||||
- Begleitperson (ja/nein)
|
||||
|
||||
ADDITIONAL:
|
||||
- Ergänzende Angaben
|
||||
|
||||
DOCTOR INFORMATION:
|
||||
- Arzt Name
|
||||
- Facharztbezeichnung
|
||||
- Praxis Anschrift
|
||||
- Praxis Telefon
|
||||
- BSNR
|
||||
- LANR
|
||||
- Unterschrift Datum
|
||||
`;
|
||||
|
||||
export const processDocuments = async (
|
||||
blankForm: FileData,
|
||||
sourceDocument: FileData,
|
||||
pdfFields: PdfFieldInfo[] = []
|
||||
): Promise<FormResponse> => {
|
||||
|
||||
|
||||
// Detect if we have a known template
|
||||
const detectedTemplate = detectTemplate(blankForm.file?.name || '');
|
||||
const expectedFields = detectedTemplate ? getExpectedFields(detectedTemplate) : [];
|
||||
|
||||
const formPart = {
|
||||
inlineData: {
|
||||
data: blankForm.base64,
|
||||
|
|
@ -98,11 +185,34 @@ export const processDocuments = async (
|
|||
let systemPrompt = `
|
||||
ROLE: Intelligent Document Processing AI (Verification Expert).
|
||||
TASK: Extract data from the SOURCE DOCUMENT and map it to the BLANK TARGET FORM.
|
||||
|
||||
|
||||
CRITICAL INSTRUCTION: You must verify every extraction. If a value is ambiguous, plausibility is low, or you are guessing, set validation.status to 'WARNING' and explain why in validation.message.
|
||||
`;
|
||||
|
||||
if (pdfFields.length > 0) {
|
||||
// Add template-specific instructions
|
||||
if (detectedTemplate === 'G2210-11') {
|
||||
systemPrompt += `
|
||||
DETECTED FORM: G2210-11 (Ärztlicher Befundbericht der DRV Westfalen)
|
||||
|
||||
${G2210_FIELDS}
|
||||
|
||||
IMPORTANT INSTRUCTIONS:
|
||||
1. Extract ALL fields listed above, even if they are empty in the source.
|
||||
2. Use the EXACT label names as listed above for each field.
|
||||
3. For multi-value fields like diagnoses and medications, create separate field entries.
|
||||
4. For checkbox fields (Mobilität, Selbstversorgung, etc.), return separate fields for each option.
|
||||
Example: If mobility is "erheblich", return:
|
||||
- "Mobilität keine" with value ""
|
||||
- "Mobilität gering" with value ""
|
||||
- "Mobilität erheblich" with value "true"
|
||||
5. ICD-10 codes must be in standard format (e.g., "M54.5", "F32.1")
|
||||
6. Dates must be in DD.MM.YYYY format.
|
||||
7. For Leistungsvermögen, return separate checkbox fields:
|
||||
- "Leistungsvermögen vollschichtig" (true/false)
|
||||
- "Leistungsvermögen 3-6 Stunden" (true/false)
|
||||
- "Leistungsvermögen unter 3 Stunden" (true/false)
|
||||
`;
|
||||
} else if (pdfFields.length > 0) {
|
||||
const fieldList = pdfFields.map(f => `"${f.name}" (${f.type})`).join(", ");
|
||||
systemPrompt += `
|
||||
MODE: FILLABLE PDF (AcroForm).
|
||||
|
|
@ -110,12 +220,18 @@ export const processDocuments = async (
|
|||
Map extracted data to these exact field IDs: [${fieldList}].
|
||||
Return the 'key' property matching the field ID.
|
||||
`;
|
||||
} else if (expectedFields.length > 0) {
|
||||
systemPrompt += `
|
||||
MODE: TEMPLATE-BASED EXTRACTION.
|
||||
Extract the following specific fields: [${expectedFields.join(", ")}].
|
||||
Use these exact label names in your response.
|
||||
`;
|
||||
} else {
|
||||
systemPrompt += `
|
||||
MODE: VISUAL FILLING (Flat/XFA/Scan).
|
||||
The target form DOES NOT have accessible digital fields.
|
||||
You must VISUALLY locate where the text should be written.
|
||||
|
||||
|
||||
For every field you identify on the TARGET FORM:
|
||||
1. Extract the corresponding value from the SOURCE DOCUMENT.
|
||||
2. Estimate the VISUAL COORDINATES [pageIndex, x, y] where the text should start.
|
||||
|
|
@ -123,7 +239,7 @@ export const processDocuments = async (
|
|||
- (0,0) is the top-left corner of the page.
|
||||
- (1000,1000) is the bottom-right corner.
|
||||
- Align text slightly above lines or inside boxes.
|
||||
|
||||
|
||||
For checkboxes: If true/yes, the value should be "X" placed inside the box.
|
||||
`;
|
||||
}
|
||||
|
|
|
|||
251
services/latexService.ts
Normal file
251
services/latexService.ts
Normal file
|
|
@ -0,0 +1,251 @@
|
|||
/**
|
||||
* LaTeX Form Generation Service
|
||||
*
|
||||
* This service communicates with the Python LaTeX backend to generate
|
||||
* filled PDF forms using LaTeX templates.
|
||||
*/
|
||||
|
||||
import { ExtractedField } from '../types';
|
||||
|
||||
// Backend API URL - can be configured via environment variable
|
||||
const API_BASE_URL = import.meta.env.VITE_LATEX_API_URL || 'http://localhost:5000';
|
||||
|
||||
export interface LatexGenerationResult {
|
||||
success: boolean;
|
||||
pdf?: string; // base64 encoded PDF
|
||||
mappedFields?: Record<string, string>;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export interface TemplateInfo {
|
||||
name: string;
|
||||
fields: string[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the LaTeX backend is available
|
||||
*/
|
||||
export const isLatexServiceAvailable = async (): Promise<boolean> => {
|
||||
try {
|
||||
const response = await fetch(`${API_BASE_URL}/api/health`, {
|
||||
method: 'GET',
|
||||
signal: AbortSignal.timeout(3000),
|
||||
});
|
||||
return response.ok;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Get list of available LaTeX templates
|
||||
*/
|
||||
export const getAvailableTemplates = async (): Promise<string[]> => {
|
||||
try {
|
||||
const response = await fetch(`${API_BASE_URL}/api/templates`);
|
||||
if (!response.ok) {
|
||||
throw new Error('Failed to fetch templates');
|
||||
}
|
||||
const data = await response.json();
|
||||
return data.templates || [];
|
||||
} catch (error) {
|
||||
console.warn('Could not fetch templates:', error);
|
||||
return [];
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Get field mapping for a specific template
|
||||
*/
|
||||
export const getTemplateFieldMapping = async (templateName: string): Promise<Record<string, string[]> | null> => {
|
||||
try {
|
||||
const response = await fetch(`${API_BASE_URL}/api/field-mapping/${templateName}`);
|
||||
if (!response.ok) {
|
||||
return null;
|
||||
}
|
||||
const data = await response.json();
|
||||
return data.mapping || null;
|
||||
} catch (error) {
|
||||
console.warn('Could not fetch field mapping:', error);
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Generate a filled PDF using LaTeX template
|
||||
*/
|
||||
export const generateLatexPdf = async (
|
||||
templateName: string,
|
||||
fields: ExtractedField[]
|
||||
): Promise<LatexGenerationResult> => {
|
||||
try {
|
||||
const response = await fetch(`${API_BASE_URL}/api/generate`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
template: templateName,
|
||||
fields: fields.map(f => ({
|
||||
label: f.label,
|
||||
value: f.value,
|
||||
key: f.key,
|
||||
})),
|
||||
format: 'base64',
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.json().catch(() => ({}));
|
||||
throw new Error(errorData.error || `HTTP ${response.status}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
return {
|
||||
success: true,
|
||||
pdf: data.pdf,
|
||||
mappedFields: data.mapped_fields,
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('LaTeX PDF generation failed:', error);
|
||||
return {
|
||||
success: false,
|
||||
error: error instanceof Error ? error.message : 'Unknown error',
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Preview the filled LaTeX source (for debugging)
|
||||
*/
|
||||
export const previewLatexSource = async (
|
||||
templateName: string,
|
||||
fields: ExtractedField[]
|
||||
): Promise<{ latex?: string; error?: string }> => {
|
||||
try {
|
||||
const response = await fetch(`${API_BASE_URL}/api/preview`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
template: templateName,
|
||||
fields: fields.map(f => ({
|
||||
label: f.label,
|
||||
value: f.value,
|
||||
key: f.key,
|
||||
})),
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.json().catch(() => ({}));
|
||||
throw new Error(errorData.error || `HTTP ${response.status}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
return { latex: data.latex };
|
||||
} catch (error) {
|
||||
return { error: error instanceof Error ? error.message : 'Unknown error' };
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Convert base64 PDF to Blob URL for preview/download
|
||||
*/
|
||||
export const base64ToBlob = (base64: string, mimeType: string = 'application/pdf'): Blob => {
|
||||
const byteCharacters = atob(base64);
|
||||
const byteNumbers = new Array(byteCharacters.length);
|
||||
for (let i = 0; i < byteCharacters.length; i++) {
|
||||
byteNumbers[i] = byteCharacters.charCodeAt(i);
|
||||
}
|
||||
const byteArray = new Uint8Array(byteNumbers);
|
||||
return new Blob([byteArray], { type: mimeType });
|
||||
};
|
||||
|
||||
/**
|
||||
* Detect which template to use based on form file name or content
|
||||
*/
|
||||
export const detectTemplate = (fileName: string): string | null => {
|
||||
const lowerName = fileName.toLowerCase();
|
||||
|
||||
// G2210-11 Ärztlicher Befundbericht
|
||||
if (lowerName.includes('g2210') ||
|
||||
lowerName.includes('befundbericht') ||
|
||||
lowerName.includes('aerztlicher') ||
|
||||
lowerName.includes('ärztlicher')) {
|
||||
return 'G2210-11';
|
||||
}
|
||||
|
||||
// Add more template detection patterns here
|
||||
// if (lowerName.includes('s0051')) return 'S0051';
|
||||
|
||||
return null;
|
||||
};
|
||||
|
||||
/**
|
||||
* Get expected fields for a known form type
|
||||
* This helps the AI extraction know what fields to look for
|
||||
*/
|
||||
export const getExpectedFields = (templateName: string): string[] => {
|
||||
const fieldMappings: Record<string, string[]> = {
|
||||
'G2210-11': [
|
||||
'Versicherungsnummer',
|
||||
'ABT.-Nr.',
|
||||
'Name, Vorname',
|
||||
'Geburtsdatum',
|
||||
'Geschlecht',
|
||||
'Straße, Hausnummer',
|
||||
'PLZ',
|
||||
'Ort',
|
||||
'Telefon',
|
||||
'Krankenkasse',
|
||||
'Derzeitige Tätigkeit',
|
||||
'Arbeitgeber',
|
||||
'Arbeitsunfähig seit',
|
||||
'Diagnose 1',
|
||||
'Diagnose 1 ICD',
|
||||
'Diagnose 2',
|
||||
'Diagnose 2 ICD',
|
||||
'Diagnose 3',
|
||||
'Diagnose 3 ICD',
|
||||
'Diagnose 4',
|
||||
'Diagnose 4 ICD',
|
||||
'Diagnose 5',
|
||||
'Diagnose 5 ICD',
|
||||
'Diagnose 6',
|
||||
'Diagnose 6 ICD',
|
||||
'Anamnese/Beschwerden',
|
||||
'Krankheitsverlauf',
|
||||
'Körperlicher Befund',
|
||||
'Mobilität (keine/gering/erheblich)',
|
||||
'Selbstversorgung (keine/gering/erheblich)',
|
||||
'Haushaltsführung (keine/gering/erheblich)',
|
||||
'Erwerbstätigkeit (keine/gering/erheblich)',
|
||||
'Medikament 1',
|
||||
'Medikament 1 Dosis',
|
||||
'Medikament 2',
|
||||
'Medikament 2 Dosis',
|
||||
'Medikament 3',
|
||||
'Medikament 3 Dosis',
|
||||
'Physikalische Therapie',
|
||||
'Frühere Reha Zeitraum',
|
||||
'Frühere Reha Einrichtung',
|
||||
'Leistungsvermögen',
|
||||
'Rehabilitationsbedürftigkeit',
|
||||
'Rehabilitationsziel',
|
||||
'Rehabilitationsform (stationär/ambulant)',
|
||||
'Reisefähig (ja/nein)',
|
||||
'Begleitperson erforderlich (ja/nein)',
|
||||
'Ergänzende Angaben',
|
||||
'Arzt Name',
|
||||
'Facharztbezeichnung',
|
||||
'Praxis Anschrift',
|
||||
'Praxis Telefon',
|
||||
'BSNR',
|
||||
'LANR',
|
||||
],
|
||||
};
|
||||
|
||||
return fieldMappings[templateName] || [];
|
||||
};
|
||||
Loading…
Add table
Add a link
Reference in a new issue