feat: Add LaTeX template-based form filling for G2210-11
Replace unreliable visual overlay mode with precise LaTeX templates: - Add LaTeX template for G2210-11 (Ärztlicher Befundbericht der WAG) - Create Python Flask backend for LaTeX compilation (latex_service.py, server.py) - Add frontend latexService.ts for API communication - Update ReviewPanel with LaTeX mode toggle and preview - Enhance Gemini prompts with G2210-11 specific field extraction - Add Dockerfile with TeX Live for Railway deployment - Update railway.toml to use Docker builder The LaTeX approach ensures accurate field placement and proper formatting for German medical/insurance forms. https://claude.ai/code/session_016pQhdznHZ74Fpkvwr3cLBq
This commit is contained in:
parent
adf8f0240e
commit
19e96ef59b
10 changed files with 1557 additions and 32 deletions
249
latex_service.py
Normal file
249
latex_service.py
Normal file
|
|
@ -0,0 +1,249 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
LaTeX Form Generation Service
|
||||
|
||||
This service generates filled PDF forms using LaTeX templates.
|
||||
It takes extracted field data and compiles a LaTeX template into a PDF.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import tempfile
|
||||
import base64
|
||||
import sys
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
# Template directory
|
||||
TEMPLATE_DIR = Path(__file__).parent / "templates"
|
||||
|
||||
|
||||
def escape_latex(text: str) -> str:
|
||||
"""Escape special LaTeX characters in text."""
|
||||
if not text:
|
||||
return ""
|
||||
|
||||
# LaTeX special characters that need escaping
|
||||
replacements = [
|
||||
('\\', r'\textbackslash{}'),
|
||||
('&', r'\&'),
|
||||
('%', r'\%'),
|
||||
('$', r'\$'),
|
||||
('#', r'\#'),
|
||||
('_', r'\_'),
|
||||
('{', r'\{'),
|
||||
('}', r'\}'),
|
||||
('~', r'\textasciitilde{}'),
|
||||
('^', r'\textasciicircum{}'),
|
||||
]
|
||||
|
||||
result = text
|
||||
for old, new in replacements:
|
||||
result = result.replace(old, new)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def checkbox(value: str) -> str:
|
||||
"""Return LaTeX checkbox symbol based on value."""
|
||||
if not value:
|
||||
return r'$\square$'
|
||||
|
||||
val_lower = value.lower().strip()
|
||||
if val_lower in ('true', 'yes', 'ja', 'x', '1', 'checked'):
|
||||
return r'$\boxtimes$'
|
||||
return r'$\square$'
|
||||
|
||||
|
||||
def format_date(date_str: str) -> str:
|
||||
"""Ensure date is in DD.MM.YYYY format."""
|
||||
if not date_str:
|
||||
return ""
|
||||
|
||||
# Already in correct format
|
||||
if len(date_str) == 10 and date_str[2] == '.' and date_str[5] == '.':
|
||||
return escape_latex(date_str)
|
||||
|
||||
# Try to parse ISO format
|
||||
try:
|
||||
from datetime import datetime
|
||||
for fmt in ['%Y-%m-%d', '%d/%m/%Y', '%m/%d/%Y']:
|
||||
try:
|
||||
dt = datetime.strptime(date_str, fmt)
|
||||
return dt.strftime('%d.%m.%Y')
|
||||
except ValueError:
|
||||
continue
|
||||
except:
|
||||
pass
|
||||
|
||||
return escape_latex(date_str)
|
||||
|
||||
|
||||
def load_template(template_name: str) -> str:
|
||||
"""Load a LaTeX template file."""
|
||||
template_path = TEMPLATE_DIR / f"{template_name}.tex"
|
||||
|
||||
if not template_path.exists():
|
||||
raise FileNotFoundError(f"Template not found: {template_path}")
|
||||
|
||||
return template_path.read_text(encoding='utf-8')
|
||||
|
||||
|
||||
def fill_template(template: str, fields: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Fill a LaTeX template with field values.
|
||||
|
||||
Fields can be accessed in template as:
|
||||
- {{field_name}} for escaped text values
|
||||
- {{field_name|raw}} for raw values (no escaping)
|
||||
- {{field_name|checkbox}} for checkbox symbols
|
||||
- {{field_name|date}} for date formatting
|
||||
"""
|
||||
result = template
|
||||
|
||||
# Process each field
|
||||
for key, value in fields.items():
|
||||
value_str = str(value) if value is not None else ""
|
||||
|
||||
# Replace with different formatters
|
||||
# Raw (no escaping)
|
||||
result = result.replace(f'{{{{{key}|raw}}}}', value_str)
|
||||
# Checkbox
|
||||
result = result.replace(f'{{{{{key}|checkbox}}}}', checkbox(value_str))
|
||||
# Date
|
||||
result = result.replace(f'{{{{{key}|date}}}}', format_date(value_str))
|
||||
# Default (escaped)
|
||||
result = result.replace(f'{{{{{key}}}}}', escape_latex(value_str))
|
||||
|
||||
# Clean up any remaining placeholders (unfilled fields)
|
||||
import re
|
||||
result = re.sub(r'\{\{[^}]+\}\}', '', result)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def compile_latex(latex_content: str, output_format: str = 'pdf') -> bytes:
|
||||
"""
|
||||
Compile LaTeX content to PDF.
|
||||
|
||||
Returns the PDF as bytes.
|
||||
"""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tex_file = Path(tmpdir) / "document.tex"
|
||||
tex_file.write_text(latex_content, encoding='utf-8')
|
||||
|
||||
# Copy any additional files (images, etc.) if needed
|
||||
# For now, we just compile the main document
|
||||
|
||||
# Run pdflatex twice (for references)
|
||||
for _ in range(2):
|
||||
result = subprocess.run(
|
||||
['pdflatex', '-interaction=nonstopmode', '-output-directory', tmpdir, str(tex_file)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
# Check for common errors
|
||||
error_log = Path(tmpdir) / "document.log"
|
||||
if error_log.exists():
|
||||
log_content = error_log.read_text(encoding='utf-8', errors='ignore')
|
||||
# Extract error lines
|
||||
errors = [line for line in log_content.split('\n') if line.startswith('!')]
|
||||
if errors:
|
||||
raise RuntimeError(f"LaTeX compilation failed: {'; '.join(errors[:3])}")
|
||||
raise RuntimeError(f"LaTeX compilation failed: {result.stderr[:500]}")
|
||||
|
||||
pdf_file = Path(tmpdir) / "document.pdf"
|
||||
if not pdf_file.exists():
|
||||
raise RuntimeError("PDF file was not created")
|
||||
|
||||
return pdf_file.read_bytes()
|
||||
|
||||
|
||||
def generate_form(template_name: str, fields: Dict[str, Any]) -> bytes:
|
||||
"""
|
||||
Generate a filled PDF form from a template and field data.
|
||||
|
||||
Args:
|
||||
template_name: Name of the template (without .tex extension)
|
||||
fields: Dictionary of field names to values
|
||||
|
||||
Returns:
|
||||
PDF content as bytes
|
||||
"""
|
||||
template = load_template(template_name)
|
||||
filled = fill_template(template, fields)
|
||||
return compile_latex(filled)
|
||||
|
||||
|
||||
def list_templates() -> list:
|
||||
"""List available templates."""
|
||||
if not TEMPLATE_DIR.exists():
|
||||
return []
|
||||
|
||||
return [f.stem for f in TEMPLATE_DIR.glob("*.tex")]
|
||||
|
||||
|
||||
# CLI Interface
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description='LaTeX Form Generation Service')
|
||||
parser.add_argument('command', choices=['generate', 'list', 'preview'],
|
||||
help='Command to execute')
|
||||
parser.add_argument('--template', '-t', help='Template name')
|
||||
parser.add_argument('--fields', '-f', help='JSON string or file path with field data')
|
||||
parser.add_argument('--output', '-o', help='Output file path')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.command == 'list':
|
||||
templates = list_templates()
|
||||
print(json.dumps(templates))
|
||||
|
||||
elif args.command == 'preview':
|
||||
# Output the filled LaTeX source (for debugging)
|
||||
if not args.template or not args.fields:
|
||||
print("Error: --template and --fields required", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if args.fields.startswith('{'):
|
||||
fields = json.loads(args.fields)
|
||||
else:
|
||||
with open(args.fields, 'r') as f:
|
||||
fields = json.load(f)
|
||||
|
||||
template = load_template(args.template)
|
||||
filled = fill_template(template, fields)
|
||||
print(filled)
|
||||
|
||||
elif args.command == 'generate':
|
||||
if not args.template or not args.fields:
|
||||
print("Error: --template and --fields required", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Parse fields
|
||||
if args.fields.startswith('{'):
|
||||
fields = json.loads(args.fields)
|
||||
else:
|
||||
with open(args.fields, 'r') as f:
|
||||
fields = json.load(f)
|
||||
|
||||
try:
|
||||
pdf_bytes = generate_form(args.template, fields)
|
||||
|
||||
if args.output:
|
||||
with open(args.output, 'wb') as f:
|
||||
f.write(pdf_bytes)
|
||||
print(f"PDF written to {args.output}", file=sys.stderr)
|
||||
else:
|
||||
# Output base64 encoded PDF to stdout
|
||||
print(base64.b64encode(pdf_bytes).decode('ascii'))
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
Loading…
Add table
Add a link
Reference in a new issue