Implement Phase 4: tools, God Mode, and missing features

Backend:
- Add Tavily web search tool wrapper (tools/web_search.py)
- Add PDF reader + ChromaDB vector store tool (tools/pdf_reader.py)
- Bind tools to LLM calls via .bind_tools() in dynamic_graph_builder
- Implement God Mode using LangGraph interrupt_before + MemorySaver
- Add approve/reject/modify API endpoints for God Mode
- Add PDF upload endpoint with ingestion pipeline
- Add persistent run history (CouncilRun model + run_service + API)
- Add Alembic migration for council_runs table
- Enhance WebSocket to emit run_paused and run_resumed events
- Add tests for tools, God Mode, and run history

Frontend:
- Add God Mode approval UI (GodModePanel component)
- Add Auto-Pilot / God Mode toggle in Konferenzzimmer
- Add functional PDF upload handler
- Add Conditional Edge editor (EdgeSettingsPanel component)
- Add edge click selection in ArchitectCanvas
- Update Zustand store with edge selection and update actions
- Update types for God Mode, execution modes, and WS events
- Update API client with God Mode, PDF upload, and blueprint run endpoints
- Update WebSocket hook for paused/resumed events
- Add Vitest config and frontend tests (store, parser, types, API)

https://claude.ai/code/session_017U6idFgaqnYTXzPxA7mxMv
This commit is contained in:
Claude 2026-02-21 10:53:12 +00:00
parent c6d0c4a636
commit 001649a364
No known key found for this signature in database
31 changed files with 2502 additions and 81 deletions

View file

@ -0,0 +1,192 @@
"""
Tests for God Mode (interrupt_before) functionality.
All LLM calls are mocked no real API calls are made in these tests.
"""
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
import pytest
from unittest.mock import patch, MagicMock
from state import CouncilState
class TestBuildGraphGodMode:
"""Tests for graph compilation with god mode (interrupt_before)."""
def _make_simple_blueprint(self):
return {
"version": 1,
"name": "Test Council",
"nodes": [
{
"id": "master",
"label": "Master AI",
"systemPrompt": "You are the master writer.",
"model": "claude-3-5-sonnet",
"tools": {"webSearch": False, "pdfReader": False},
},
{
"id": "critic",
"label": "Critic AI",
"systemPrompt": "You are a critic who evaluates and scores drafts.",
"model": "claude-3-5-sonnet",
"tools": {"webSearch": False, "pdfReader": False},
},
],
"edges": [
{"id": "e1", "source": "master", "target": "critic", "type": "linear"},
],
}
@patch("services.dynamic_graph_builder._get_llm")
def test_build_graph_with_god_mode_compiles(self, mock_get_llm):
"""God mode graph should compile without error."""
from services.dynamic_graph_builder import build_graph_from_blueprint
blueprint = self._make_simple_blueprint()
graph = build_graph_from_blueprint(blueprint, god_mode=False)
assert graph is not None
def test_build_graph_without_god_mode(self):
"""Normal graph should compile without interrupt_before."""
from services.dynamic_graph_builder import build_graph_from_blueprint
blueprint = self._make_simple_blueprint()
graph = build_graph_from_blueprint(blueprint, god_mode=False)
assert graph is not None
class TestGodModeSessionManagement:
"""Tests for god mode session management functions."""
def test_get_god_mode_state_returns_none_for_unknown_run(self):
from services.dynamic_graph_builder import get_god_mode_state
result = get_god_mode_state("nonexistent-run-id")
assert result is None
@pytest.mark.asyncio
async def test_resume_god_mode_returns_none_for_unknown_run(self):
from services.dynamic_graph_builder import resume_god_mode
result = await resume_god_mode("nonexistent-run-id", action="approve")
assert result is None
@pytest.mark.asyncio
async def test_resume_god_mode_reject_cleans_up(self):
from services.dynamic_graph_builder import (
_god_mode_sessions,
resume_god_mode,
)
# Manually insert a fake session
_god_mode_sessions["test-run"] = {
"graph": MagicMock(),
"checkpointer": MagicMock(),
"thread_config": {"configurable": {"thread_id": "test-run"}},
}
result = await resume_god_mode("test-run", action="reject")
assert result is None
assert "test-run" not in _god_mode_sessions
class TestToolResolution:
"""Tests for the tool resolution helper."""
def test_resolve_tools_none_config(self):
from services.dynamic_graph_builder import _resolve_tools
assert _resolve_tools(None) == []
def test_resolve_tools_empty_config(self):
from services.dynamic_graph_builder import _resolve_tools
assert _resolve_tools({}) == []
def test_resolve_tools_web_search_only(self):
from services.dynamic_graph_builder import _resolve_tools
tools = _resolve_tools({"webSearch": True, "pdfReader": False})
assert len(tools) == 1
assert tools[0].name == "web_search"
def test_resolve_tools_pdf_only(self):
from services.dynamic_graph_builder import _resolve_tools
tools = _resolve_tools({"webSearch": False, "pdfReader": True})
assert len(tools) == 1
assert tools[0].name == "pdf_search"
def test_resolve_tools_both(self):
from services.dynamic_graph_builder import _resolve_tools
tools = _resolve_tools({"webSearch": True, "pdfReader": True})
assert len(tools) == 2
names = {t.name for t in tools}
assert names == {"web_search", "pdf_search"}
class TestInvokeWithTools:
"""Tests for the _invoke_with_tools helper."""
def test_invoke_without_tools_calls_llm_directly(self):
from services.dynamic_graph_builder import _invoke_with_tools
mock_llm = MagicMock()
mock_response = MagicMock()
mock_response.content = "Test response"
mock_llm.invoke.return_value = mock_response
result = _invoke_with_tools(mock_llm, ["msg1", "msg2"], [])
mock_llm.invoke.assert_called_once_with(["msg1", "msg2"])
assert result == mock_response
def test_invoke_with_tools_no_tool_calls(self):
from services.dynamic_graph_builder import _invoke_with_tools
mock_llm = MagicMock()
mock_bound = MagicMock()
mock_llm.bind_tools.return_value = mock_bound
mock_response = MagicMock()
mock_response.tool_calls = []
mock_response.content = "No tools needed"
mock_bound.invoke.return_value = mock_response
mock_tool = MagicMock()
mock_tool.name = "test_tool"
result = _invoke_with_tools(mock_llm, ["msg"], [mock_tool])
assert result == mock_response
def test_invoke_with_tools_executes_tool_calls(self):
from services.dynamic_graph_builder import _invoke_with_tools
mock_llm = MagicMock()
mock_bound = MagicMock()
mock_llm.bind_tools.return_value = mock_bound
# First call returns tool_calls
mock_response_with_tools = MagicMock()
mock_response_with_tools.tool_calls = [
{"name": "web_search", "args": {"query": "test"}, "id": "call-1"}
]
# Second call returns final answer
mock_final_response = MagicMock()
mock_final_response.content = "Final answer"
mock_bound.invoke.side_effect = [mock_response_with_tools, mock_final_response]
mock_tool = MagicMock()
mock_tool.name = "web_search"
mock_tool.invoke.return_value = "Search results"
result = _invoke_with_tools(mock_llm, ["msg"], [mock_tool])
mock_tool.invoke.assert_called_once_with({"query": "test"})
assert result == mock_final_response

View file

@ -0,0 +1,82 @@
"""
Tests for the run history service and CouncilRun model.
"""
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
import pytest
from unittest.mock import AsyncMock, MagicMock, patch
class TestCouncilRunModel:
"""Tests for the CouncilRun SQLAlchemy model."""
def test_to_dict_serialization(self):
from models.council_run import CouncilRun
from datetime import datetime, timezone
run = CouncilRun(
id="test-id",
blueprint_id="bp-id",
input_topic="Test topic",
status="completed",
execution_mode="auto-pilot",
final_draft="Final text",
critic_score=8.5,
iteration_count=3,
active_node="done",
error=None,
created_at=datetime(2026, 1, 1, tzinfo=timezone.utc),
completed_at=datetime(2026, 1, 1, 0, 5, tzinfo=timezone.utc),
)
d = run.to_dict()
assert d["id"] == "test-id"
assert d["blueprint_id"] == "bp-id"
assert d["status"] == "completed"
assert d["critic_score"] == 8.5
assert d["iteration_count"] == 3
assert d["created_at"] is not None
assert d["completed_at"] is not None
def test_to_dict_with_none_timestamps(self):
from models.council_run import CouncilRun
run = CouncilRun(
id="test-id",
input_topic="Test",
status="pending",
execution_mode="god-mode",
created_at=None,
completed_at=None,
)
d = run.to_dict()
assert d["created_at"] is None
assert d["completed_at"] is None
assert d["execution_mode"] == "god-mode"
class TestRunHistoryRoutes:
"""Tests for the run history API routes."""
@pytest.mark.asyncio
async def test_list_runs_empty(self):
"""List runs returns empty list when no runs exist."""
from api.run_history_routes import list_all_runs
mock_session = AsyncMock()
mock_result = MagicMock()
mock_scalars = MagicMock()
mock_scalars.all.return_value = []
mock_result.scalars.return_value = mock_scalars
mock_session.execute.return_value = mock_result
with patch("services.run_service.list_runs") as mock_list:
mock_list.return_value = []
result = await list_all_runs(limit=50, offset=0, session=mock_session)
assert result == []

170
backend/tests/test_tools.py Normal file
View file

@ -0,0 +1,170 @@
"""
Tests for agent tools (web search and PDF reader).
All external API calls are mocked no real calls to Tavily or ChromaDB.
"""
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
import pytest
from unittest.mock import patch, MagicMock
class TestWebSearchTool:
"""Tests for the Tavily web search tool."""
@patch.dict(os.environ, {"TAVILY_API_KEY": ""}, clear=False)
def test_web_search_returns_error_without_api_key(self):
from tools.web_search import web_search
result = web_search.invoke({"query": "test query"})
assert "TAVILY_API_KEY" in result
@patch.dict(os.environ, {"TAVILY_API_KEY": "test-key"}, clear=False)
@patch("tools.web_search.TavilyClient")
def test_web_search_returns_formatted_results(self, mock_client_cls):
mock_client = MagicMock()
mock_client.search.return_value = {
"results": [
{
"title": "Test Result",
"url": "https://example.com",
"content": "Some content here",
}
]
}
mock_client_cls.return_value = mock_client
from tools.web_search import web_search
result = web_search.invoke({"query": "test query"})
assert "Test Result" in result
assert "https://example.com" in result
assert "Some content here" in result
@patch.dict(os.environ, {"TAVILY_API_KEY": "test-key"}, clear=False)
@patch("tools.web_search.TavilyClient")
def test_web_search_handles_empty_results(self, mock_client_cls):
mock_client = MagicMock()
mock_client.search.return_value = {"results": []}
mock_client_cls.return_value = mock_client
from tools.web_search import web_search
result = web_search.invoke({"query": "obscure query"})
assert "No results" in result
@patch.dict(os.environ, {"TAVILY_API_KEY": "test-key"}, clear=False)
@patch("tools.web_search.TavilyClient")
def test_web_search_handles_api_error(self, mock_client_cls):
mock_client = MagicMock()
mock_client.search.side_effect = Exception("API rate limit")
mock_client_cls.return_value = mock_client
from tools.web_search import web_search
result = web_search.invoke({"query": "test"})
assert "Error" in result
assert "rate limit" in result
class TestCreateWebSearchTool:
"""Tests for the web search tool factory."""
@patch.dict(os.environ, {"TAVILY_API_KEY": "test-key"}, clear=False)
def test_factory_returns_tool_when_key_set(self):
from tools.web_search import create_web_search_tool
tool = create_web_search_tool()
assert tool is not None
@patch.dict(os.environ, {}, clear=True)
def test_factory_returns_none_when_key_missing(self):
from tools.web_search import create_web_search_tool
tool = create_web_search_tool()
assert tool is None
class TestPdfSearchTool:
"""Tests for the PDF reader tool."""
@patch("tools.pdf_reader._get_chroma_collection")
def test_pdf_search_empty_collection(self, mock_get_collection):
mock_collection = MagicMock()
mock_collection.count.return_value = 0
mock_get_collection.return_value = mock_collection
from tools.pdf_reader import pdf_search
result = pdf_search.invoke({"query": "test query"})
assert "No documents" in result
@patch("tools.pdf_reader._get_chroma_collection")
def test_pdf_search_returns_results(self, mock_get_collection):
mock_collection = MagicMock()
mock_collection.count.return_value = 3
mock_collection.query.return_value = {
"documents": [["First passage about AI.", "Second passage about ML."]],
"metadatas": [[
{"source": "paper.pdf", "page": 1},
{"source": "paper.pdf", "page": 3},
]],
}
mock_get_collection.return_value = mock_collection
from tools.pdf_reader import pdf_search
result = pdf_search.invoke({"query": "AI concepts"})
assert "paper.pdf" in result
assert "First passage" in result
assert "Page 1" in result
@patch("tools.pdf_reader._get_chroma_collection")
def test_pdf_search_handles_error(self, mock_get_collection):
mock_get_collection.side_effect = Exception("ChromaDB unavailable")
from tools.pdf_reader import pdf_search
result = pdf_search.invoke({"query": "test"})
assert "Error" in result
class TestPdfIngestion:
"""Tests for PDF ingestion into ChromaDB."""
@patch("tools.pdf_reader._get_chroma_collection")
@patch("tools.pdf_reader.PdfReader")
def test_ingest_pdf_processes_pages(self, mock_pdf_reader_cls, mock_get_collection):
# Mock PDF with 2 pages of text
mock_page1 = MagicMock()
mock_page1.extract_text.return_value = "This is the first page with some content " * 20
mock_page2 = MagicMock()
mock_page2.extract_text.return_value = "Second page about machine learning " * 20
mock_reader = MagicMock()
mock_reader.pages = [mock_page1, mock_page2]
mock_pdf_reader_cls.return_value = mock_reader
mock_collection = MagicMock()
mock_get_collection.return_value = mock_collection
from tools.pdf_reader import ingest_pdf
chunks = ingest_pdf("/tmp/test.pdf")
assert chunks > 0
mock_collection.upsert.assert_called_once()
@patch("tools.pdf_reader._get_chroma_collection")
@patch("tools.pdf_reader.PdfReader")
def test_ingest_pdf_empty_file(self, mock_pdf_reader_cls, mock_get_collection):
mock_reader = MagicMock()
mock_reader.pages = []
mock_pdf_reader_cls.return_value = mock_reader
from tools.pdf_reader import ingest_pdf
chunks = ingest_pdf("/tmp/empty.pdf")
assert chunks == 0