Replace unreliable visual overlay mode with precise LaTeX templates: - Add LaTeX template for G2210-11 (Ärztlicher Befundbericht der WAG) - Create Python Flask backend for LaTeX compilation (latex_service.py, server.py) - Add frontend latexService.ts for API communication - Update ReviewPanel with LaTeX mode toggle and preview - Enhance Gemini prompts with G2210-11 specific field extraction - Add Dockerfile with TeX Live for Railway deployment - Update railway.toml to use Docker builder The LaTeX approach ensures accurate field placement and proper formatting for German medical/insurance forms. https://claude.ai/code/session_016pQhdznHZ74Fpkvwr3cLBq
249 lines
7.4 KiB
Python
249 lines
7.4 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
LaTeX Form Generation Service
|
|
|
|
This service generates filled PDF forms using LaTeX templates.
|
|
It takes extracted field data and compiles a LaTeX template into a PDF.
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import subprocess
|
|
import tempfile
|
|
import base64
|
|
import sys
|
|
import shutil
|
|
from pathlib import Path
|
|
from typing import Dict, Any, Optional
|
|
|
|
# Template directory
|
|
TEMPLATE_DIR = Path(__file__).parent / "templates"
|
|
|
|
|
|
def escape_latex(text: str) -> str:
|
|
"""Escape special LaTeX characters in text."""
|
|
if not text:
|
|
return ""
|
|
|
|
# LaTeX special characters that need escaping
|
|
replacements = [
|
|
('\\', r'\textbackslash{}'),
|
|
('&', r'\&'),
|
|
('%', r'\%'),
|
|
('$', r'\$'),
|
|
('#', r'\#'),
|
|
('_', r'\_'),
|
|
('{', r'\{'),
|
|
('}', r'\}'),
|
|
('~', r'\textasciitilde{}'),
|
|
('^', r'\textasciicircum{}'),
|
|
]
|
|
|
|
result = text
|
|
for old, new in replacements:
|
|
result = result.replace(old, new)
|
|
|
|
return result
|
|
|
|
|
|
def checkbox(value: str) -> str:
|
|
"""Return LaTeX checkbox symbol based on value."""
|
|
if not value:
|
|
return r'$\square$'
|
|
|
|
val_lower = value.lower().strip()
|
|
if val_lower in ('true', 'yes', 'ja', 'x', '1', 'checked'):
|
|
return r'$\boxtimes$'
|
|
return r'$\square$'
|
|
|
|
|
|
def format_date(date_str: str) -> str:
|
|
"""Ensure date is in DD.MM.YYYY format."""
|
|
if not date_str:
|
|
return ""
|
|
|
|
# Already in correct format
|
|
if len(date_str) == 10 and date_str[2] == '.' and date_str[5] == '.':
|
|
return escape_latex(date_str)
|
|
|
|
# Try to parse ISO format
|
|
try:
|
|
from datetime import datetime
|
|
for fmt in ['%Y-%m-%d', '%d/%m/%Y', '%m/%d/%Y']:
|
|
try:
|
|
dt = datetime.strptime(date_str, fmt)
|
|
return dt.strftime('%d.%m.%Y')
|
|
except ValueError:
|
|
continue
|
|
except:
|
|
pass
|
|
|
|
return escape_latex(date_str)
|
|
|
|
|
|
def load_template(template_name: str) -> str:
|
|
"""Load a LaTeX template file."""
|
|
template_path = TEMPLATE_DIR / f"{template_name}.tex"
|
|
|
|
if not template_path.exists():
|
|
raise FileNotFoundError(f"Template not found: {template_path}")
|
|
|
|
return template_path.read_text(encoding='utf-8')
|
|
|
|
|
|
def fill_template(template: str, fields: Dict[str, Any]) -> str:
|
|
"""
|
|
Fill a LaTeX template with field values.
|
|
|
|
Fields can be accessed in template as:
|
|
- {{field_name}} for escaped text values
|
|
- {{field_name|raw}} for raw values (no escaping)
|
|
- {{field_name|checkbox}} for checkbox symbols
|
|
- {{field_name|date}} for date formatting
|
|
"""
|
|
result = template
|
|
|
|
# Process each field
|
|
for key, value in fields.items():
|
|
value_str = str(value) if value is not None else ""
|
|
|
|
# Replace with different formatters
|
|
# Raw (no escaping)
|
|
result = result.replace(f'{{{{{key}|raw}}}}', value_str)
|
|
# Checkbox
|
|
result = result.replace(f'{{{{{key}|checkbox}}}}', checkbox(value_str))
|
|
# Date
|
|
result = result.replace(f'{{{{{key}|date}}}}', format_date(value_str))
|
|
# Default (escaped)
|
|
result = result.replace(f'{{{{{key}}}}}', escape_latex(value_str))
|
|
|
|
# Clean up any remaining placeholders (unfilled fields)
|
|
import re
|
|
result = re.sub(r'\{\{[^}]+\}\}', '', result)
|
|
|
|
return result
|
|
|
|
|
|
def compile_latex(latex_content: str, output_format: str = 'pdf') -> bytes:
|
|
"""
|
|
Compile LaTeX content to PDF.
|
|
|
|
Returns the PDF as bytes.
|
|
"""
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
tex_file = Path(tmpdir) / "document.tex"
|
|
tex_file.write_text(latex_content, encoding='utf-8')
|
|
|
|
# Copy any additional files (images, etc.) if needed
|
|
# For now, we just compile the main document
|
|
|
|
# Run pdflatex twice (for references)
|
|
for _ in range(2):
|
|
result = subprocess.run(
|
|
['pdflatex', '-interaction=nonstopmode', '-output-directory', tmpdir, str(tex_file)],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=60
|
|
)
|
|
|
|
if result.returncode != 0:
|
|
# Check for common errors
|
|
error_log = Path(tmpdir) / "document.log"
|
|
if error_log.exists():
|
|
log_content = error_log.read_text(encoding='utf-8', errors='ignore')
|
|
# Extract error lines
|
|
errors = [line for line in log_content.split('\n') if line.startswith('!')]
|
|
if errors:
|
|
raise RuntimeError(f"LaTeX compilation failed: {'; '.join(errors[:3])}")
|
|
raise RuntimeError(f"LaTeX compilation failed: {result.stderr[:500]}")
|
|
|
|
pdf_file = Path(tmpdir) / "document.pdf"
|
|
if not pdf_file.exists():
|
|
raise RuntimeError("PDF file was not created")
|
|
|
|
return pdf_file.read_bytes()
|
|
|
|
|
|
def generate_form(template_name: str, fields: Dict[str, Any]) -> bytes:
|
|
"""
|
|
Generate a filled PDF form from a template and field data.
|
|
|
|
Args:
|
|
template_name: Name of the template (without .tex extension)
|
|
fields: Dictionary of field names to values
|
|
|
|
Returns:
|
|
PDF content as bytes
|
|
"""
|
|
template = load_template(template_name)
|
|
filled = fill_template(template, fields)
|
|
return compile_latex(filled)
|
|
|
|
|
|
def list_templates() -> list:
|
|
"""List available templates."""
|
|
if not TEMPLATE_DIR.exists():
|
|
return []
|
|
|
|
return [f.stem for f in TEMPLATE_DIR.glob("*.tex")]
|
|
|
|
|
|
# CLI Interface
|
|
if __name__ == "__main__":
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(description='LaTeX Form Generation Service')
|
|
parser.add_argument('command', choices=['generate', 'list', 'preview'],
|
|
help='Command to execute')
|
|
parser.add_argument('--template', '-t', help='Template name')
|
|
parser.add_argument('--fields', '-f', help='JSON string or file path with field data')
|
|
parser.add_argument('--output', '-o', help='Output file path')
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.command == 'list':
|
|
templates = list_templates()
|
|
print(json.dumps(templates))
|
|
|
|
elif args.command == 'preview':
|
|
# Output the filled LaTeX source (for debugging)
|
|
if not args.template or not args.fields:
|
|
print("Error: --template and --fields required", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
if args.fields.startswith('{'):
|
|
fields = json.loads(args.fields)
|
|
else:
|
|
with open(args.fields, 'r') as f:
|
|
fields = json.load(f)
|
|
|
|
template = load_template(args.template)
|
|
filled = fill_template(template, fields)
|
|
print(filled)
|
|
|
|
elif args.command == 'generate':
|
|
if not args.template or not args.fields:
|
|
print("Error: --template and --fields required", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
# Parse fields
|
|
if args.fields.startswith('{'):
|
|
fields = json.loads(args.fields)
|
|
else:
|
|
with open(args.fields, 'r') as f:
|
|
fields = json.load(f)
|
|
|
|
try:
|
|
pdf_bytes = generate_form(args.template, fields)
|
|
|
|
if args.output:
|
|
with open(args.output, 'wb') as f:
|
|
f.write(pdf_bytes)
|
|
print(f"PDF written to {args.output}", file=sys.stderr)
|
|
else:
|
|
# Output base64 encoded PDF to stdout
|
|
print(base64.b64encode(pdf_bytes).decode('ascii'))
|
|
|
|
except Exception as e:
|
|
print(f"Error: {e}", file=sys.stderr)
|
|
sys.exit(1)
|