Implement Phase 4: tools, God Mode, and missing features

Backend: - Add Tavily web search tool wrapper (tools/web_search.py) - Add PDF reader + ChromaDB vector store tool (tools/pdf_reader.py) - Bind tools to LLM calls via .bind_tools() in dynamic_graph_builder - Implement God Mode using LangGraph interrupt_before + MemorySaver - Add approve/reject/modify API endpoints for God Mode - Add PDF upload endpoint with ingestion pipeline - Add persistent run history (CouncilRun model + run_service + API) - Add Alembic migration for council_runs table - Enhance WebSocket to emit run_paused and run_resumed events - Add tests for tools, God Mode, and run history Frontend: - Add God Mode approval UI (GodModePanel component) - Add Auto-Pilot / God Mode toggle in Konferenzzimmer - Add functional PDF upload handler - Add Conditional Edge editor (EdgeSettingsPanel component) - Add edge click selection in ArchitectCanvas - Update Zustand store with edge selection and update actions - Update types for God Mode, execution modes, and WS events - Update API client with God Mode, PDF upload, and blueprint run endpoints - Update WebSocket hook for paused/resumed events - Add Vitest config and frontend tests (store, parser, types, API) https://claude.ai/code/session_017U6idFgaqnYTXzPxA7mxMv
2026-02-21 10:53:12 +00:00 · 2026-02-21 10:53:12 +00:00 · 001649a364
commit 001649a364
parent c6d0c4a636
31 changed files with 2502 additions and 81 deletions
--- a/backend/tools/pdf_reader.py
+++ b/backend/tools/pdf_reader.py
@ -0,0 +1,140 @@
+"""
+PDF Reader Tool — PyPDF + ChromaDB vector store wrapper for agent nodes.
+
+Loads PDF files, splits them into chunks, stores embeddings in a local
+ChromaDB collection, and performs similarity search against queries.
+Requires the CHROMA_PERSIST_DIR environment variable for storage location.
+"""
+
+import os
+from typing import List, Optional
+
+from langchain_core.tools import tool
+
+# Module-level collection cache to avoid re-initializing on every call
+_collection_cache: dict = {}
+
+
+def _get_chroma_collection(collection_name: str = "council_pdfs"):
+    """Get or create a ChromaDB collection for PDF content."""
+    if collection_name in _collection_cache:
+        return _collection_cache[collection_name]
+
+    import chromadb
+
+    persist_dir = os.environ.get("CHROMA_PERSIST_DIR", "./chroma_db")
+    client = chromadb.PersistentClient(path=persist_dir)
+    collection = client.get_or_create_collection(
+        name=collection_name,
+        metadata={"hnsw:space": "cosine"},
+    )
+    _collection_cache[collection_name] = collection
+    return collection
+
+
+def ingest_pdf(file_path: str, collection_name: str = "council_pdfs") -> int:
+    """
+    Read a PDF file, split into chunks, and store in ChromaDB.
+
+    Args:
+        file_path: Path to the PDF file.
+        collection_name: ChromaDB collection name.
+
+    Returns:
+        Number of chunks ingested.
+    """
+    from pypdf import PdfReader
+
+    reader = PdfReader(file_path)
+    chunks: List[str] = []
+    metadata_list: List[dict] = []
+
+    for page_num, page in enumerate(reader.pages):
+        text = page.extract_text()
+        if not text or not text.strip():
+            continue
+
+        # Split long pages into ~500 character chunks with overlap
+        words = text.split()
+        chunk_size = 100  # words per chunk
+        overlap = 20
+
+        for i in range(0, len(words), chunk_size - overlap):
+            chunk_words = words[i : i + chunk_size]
+            chunk_text = " ".join(chunk_words)
+            if chunk_text.strip():
+                chunks.append(chunk_text)
+                metadata_list.append({
+                    "source": os.path.basename(file_path),
+                    "page": page_num + 1,
+                })
+
+    if not chunks:
+        return 0
+
+    collection = _get_chroma_collection(collection_name)
+
+    # Generate deterministic IDs based on file and chunk position
+    ids = [
+        f"{os.path.basename(file_path)}_chunk_{i}"
+        for i in range(len(chunks))
+    ]
+
+    collection.upsert(
+        documents=chunks,
+        metadatas=metadata_list,
+        ids=ids,
+    )
+
+    return len(chunks)
+
+
+@tool
+def pdf_search(query: str, n_results: int = 5) -> str:
+    """
+    Search the PDF knowledge base for information relevant to a query.
+
+    Args:
+        query: The search query to find relevant PDF content.
+        n_results: Number of results to return (default 5).
+
+    Returns:
+        A formatted string with relevant passages from ingested PDFs.
+    """
+    try:
+        collection = _get_chroma_collection()
+    except Exception as exc:  # noqa: BLE001
+        return f"[PDF Search Error] Could not access vector store: {exc}"
+
+    if collection.count() == 0:
+        return "[PDF Search] No documents have been ingested yet."
+
+    try:
+        results = collection.query(
+            query_texts=[query],
+            n_results=min(n_results, collection.count()),
+        )
+    except Exception as exc:  # noqa: BLE001
+        return f"[PDF Search Error] {exc}"
+
+    documents = results.get("documents", [[]])[0]
+    metadatas = results.get("metadatas", [[]])[0]
+
+    if not documents:
+        return f"No relevant passages found for: {query}"
+
+    formatted = []
+    for i, (doc, meta) in enumerate(zip(documents, metadatas), 1):
+        source = meta.get("source", "unknown")
+        page = meta.get("page", "?")
+        formatted.append(f"{i}. [Source: {source}, Page {page}]\n   {doc}")
+
+    return "\n\n".join(formatted)
+
+
+def create_pdf_search_tool() -> Optional[tool]:
+    """Factory that returns the pdf_search tool if ChromaDB is configured."""
+    persist_dir = os.environ.get("CHROMA_PERSIST_DIR", "./chroma_db")
+    if persist_dir:
+        return pdf_search
+    return None