Rentenversicherer/latex_service.py
Claude 19e96ef59b
feat: Add LaTeX template-based form filling for G2210-11
Replace unreliable visual overlay mode with precise LaTeX templates:

- Add LaTeX template for G2210-11 (Ärztlicher Befundbericht der WAG)
- Create Python Flask backend for LaTeX compilation (latex_service.py, server.py)
- Add frontend latexService.ts for API communication
- Update ReviewPanel with LaTeX mode toggle and preview
- Enhance Gemini prompts with G2210-11 specific field extraction
- Add Dockerfile with TeX Live for Railway deployment
- Update railway.toml to use Docker builder

The LaTeX approach ensures accurate field placement and proper
formatting for German medical/insurance forms.

https://claude.ai/code/session_016pQhdznHZ74Fpkvwr3cLBq
2026-01-29 18:29:22 +00:00

249 lines
7.4 KiB
Python

#!/usr/bin/env python3
"""
LaTeX Form Generation Service
This service generates filled PDF forms using LaTeX templates.
It takes extracted field data and compiles a LaTeX template into a PDF.
"""
import json
import os
import subprocess
import tempfile
import base64
import sys
import shutil
from pathlib import Path
from typing import Dict, Any, Optional
# Template directory
TEMPLATE_DIR = Path(__file__).parent / "templates"
def escape_latex(text: str) -> str:
"""Escape special LaTeX characters in text."""
if not text:
return ""
# LaTeX special characters that need escaping
replacements = [
('\\', r'\textbackslash{}'),
('&', r'\&'),
('%', r'\%'),
('$', r'\$'),
('#', r'\#'),
('_', r'\_'),
('{', r'\{'),
('}', r'\}'),
('~', r'\textasciitilde{}'),
('^', r'\textasciicircum{}'),
]
result = text
for old, new in replacements:
result = result.replace(old, new)
return result
def checkbox(value: str) -> str:
"""Return LaTeX checkbox symbol based on value."""
if not value:
return r'$\square$'
val_lower = value.lower().strip()
if val_lower in ('true', 'yes', 'ja', 'x', '1', 'checked'):
return r'$\boxtimes$'
return r'$\square$'
def format_date(date_str: str) -> str:
"""Ensure date is in DD.MM.YYYY format."""
if not date_str:
return ""
# Already in correct format
if len(date_str) == 10 and date_str[2] == '.' and date_str[5] == '.':
return escape_latex(date_str)
# Try to parse ISO format
try:
from datetime import datetime
for fmt in ['%Y-%m-%d', '%d/%m/%Y', '%m/%d/%Y']:
try:
dt = datetime.strptime(date_str, fmt)
return dt.strftime('%d.%m.%Y')
except ValueError:
continue
except:
pass
return escape_latex(date_str)
def load_template(template_name: str) -> str:
"""Load a LaTeX template file."""
template_path = TEMPLATE_DIR / f"{template_name}.tex"
if not template_path.exists():
raise FileNotFoundError(f"Template not found: {template_path}")
return template_path.read_text(encoding='utf-8')
def fill_template(template: str, fields: Dict[str, Any]) -> str:
"""
Fill a LaTeX template with field values.
Fields can be accessed in template as:
- {{field_name}} for escaped text values
- {{field_name|raw}} for raw values (no escaping)
- {{field_name|checkbox}} for checkbox symbols
- {{field_name|date}} for date formatting
"""
result = template
# Process each field
for key, value in fields.items():
value_str = str(value) if value is not None else ""
# Replace with different formatters
# Raw (no escaping)
result = result.replace(f'{{{{{key}|raw}}}}', value_str)
# Checkbox
result = result.replace(f'{{{{{key}|checkbox}}}}', checkbox(value_str))
# Date
result = result.replace(f'{{{{{key}|date}}}}', format_date(value_str))
# Default (escaped)
result = result.replace(f'{{{{{key}}}}}', escape_latex(value_str))
# Clean up any remaining placeholders (unfilled fields)
import re
result = re.sub(r'\{\{[^}]+\}\}', '', result)
return result
def compile_latex(latex_content: str, output_format: str = 'pdf') -> bytes:
"""
Compile LaTeX content to PDF.
Returns the PDF as bytes.
"""
with tempfile.TemporaryDirectory() as tmpdir:
tex_file = Path(tmpdir) / "document.tex"
tex_file.write_text(latex_content, encoding='utf-8')
# Copy any additional files (images, etc.) if needed
# For now, we just compile the main document
# Run pdflatex twice (for references)
for _ in range(2):
result = subprocess.run(
['pdflatex', '-interaction=nonstopmode', '-output-directory', tmpdir, str(tex_file)],
capture_output=True,
text=True,
timeout=60
)
if result.returncode != 0:
# Check for common errors
error_log = Path(tmpdir) / "document.log"
if error_log.exists():
log_content = error_log.read_text(encoding='utf-8', errors='ignore')
# Extract error lines
errors = [line for line in log_content.split('\n') if line.startswith('!')]
if errors:
raise RuntimeError(f"LaTeX compilation failed: {'; '.join(errors[:3])}")
raise RuntimeError(f"LaTeX compilation failed: {result.stderr[:500]}")
pdf_file = Path(tmpdir) / "document.pdf"
if not pdf_file.exists():
raise RuntimeError("PDF file was not created")
return pdf_file.read_bytes()
def generate_form(template_name: str, fields: Dict[str, Any]) -> bytes:
"""
Generate a filled PDF form from a template and field data.
Args:
template_name: Name of the template (without .tex extension)
fields: Dictionary of field names to values
Returns:
PDF content as bytes
"""
template = load_template(template_name)
filled = fill_template(template, fields)
return compile_latex(filled)
def list_templates() -> list:
"""List available templates."""
if not TEMPLATE_DIR.exists():
return []
return [f.stem for f in TEMPLATE_DIR.glob("*.tex")]
# CLI Interface
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description='LaTeX Form Generation Service')
parser.add_argument('command', choices=['generate', 'list', 'preview'],
help='Command to execute')
parser.add_argument('--template', '-t', help='Template name')
parser.add_argument('--fields', '-f', help='JSON string or file path with field data')
parser.add_argument('--output', '-o', help='Output file path')
args = parser.parse_args()
if args.command == 'list':
templates = list_templates()
print(json.dumps(templates))
elif args.command == 'preview':
# Output the filled LaTeX source (for debugging)
if not args.template or not args.fields:
print("Error: --template and --fields required", file=sys.stderr)
sys.exit(1)
if args.fields.startswith('{'):
fields = json.loads(args.fields)
else:
with open(args.fields, 'r') as f:
fields = json.load(f)
template = load_template(args.template)
filled = fill_template(template, fields)
print(filled)
elif args.command == 'generate':
if not args.template or not args.fields:
print("Error: --template and --fields required", file=sys.stderr)
sys.exit(1)
# Parse fields
if args.fields.startswith('{'):
fields = json.loads(args.fields)
else:
with open(args.fields, 'r') as f:
fields = json.load(f)
try:
pdf_bytes = generate_form(args.template, fields)
if args.output:
with open(args.output, 'wb') as f:
f.write(pdf_bytes)
print(f"PDF written to {args.output}", file=sys.stderr)
else:
# Output base64 encoded PDF to stdout
print(base64.b64encode(pdf_bytes).decode('ascii'))
except Exception as e:
print(f"Error: {e}", file=sys.stderr)
sys.exit(1)