Implement Phase 4: tools, God Mode, and missing features
Backend: - Add Tavily web search tool wrapper (tools/web_search.py) - Add PDF reader + ChromaDB vector store tool (tools/pdf_reader.py) - Bind tools to LLM calls via .bind_tools() in dynamic_graph_builder - Implement God Mode using LangGraph interrupt_before + MemorySaver - Add approve/reject/modify API endpoints for God Mode - Add PDF upload endpoint with ingestion pipeline - Add persistent run history (CouncilRun model + run_service + API) - Add Alembic migration for council_runs table - Enhance WebSocket to emit run_paused and run_resumed events - Add tests for tools, God Mode, and run history Frontend: - Add God Mode approval UI (GodModePanel component) - Add Auto-Pilot / God Mode toggle in Konferenzzimmer - Add functional PDF upload handler - Add Conditional Edge editor (EdgeSettingsPanel component) - Add edge click selection in ArchitectCanvas - Update Zustand store with edge selection and update actions - Update types for God Mode, execution modes, and WS events - Update API client with God Mode, PDF upload, and blueprint run endpoints - Update WebSocket hook for paused/resumed events - Add Vitest config and frontend tests (store, parser, types, API) https://claude.ai/code/session_017U6idFgaqnYTXzPxA7mxMv
This commit is contained in:
parent
c6d0c4a636
commit
001649a364
31 changed files with 2502 additions and 81 deletions
192
backend/tests/test_god_mode.py
Normal file
192
backend/tests/test_god_mode.py
Normal file
|
|
@ -0,0 +1,192 @@
|
|||
"""
|
||||
Tests for God Mode (interrupt_before) functionality.
|
||||
|
||||
All LLM calls are mocked — no real API calls are made in these tests.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
from state import CouncilState
|
||||
|
||||
|
||||
class TestBuildGraphGodMode:
|
||||
"""Tests for graph compilation with god mode (interrupt_before)."""
|
||||
|
||||
def _make_simple_blueprint(self):
|
||||
return {
|
||||
"version": 1,
|
||||
"name": "Test Council",
|
||||
"nodes": [
|
||||
{
|
||||
"id": "master",
|
||||
"label": "Master AI",
|
||||
"systemPrompt": "You are the master writer.",
|
||||
"model": "claude-3-5-sonnet",
|
||||
"tools": {"webSearch": False, "pdfReader": False},
|
||||
},
|
||||
{
|
||||
"id": "critic",
|
||||
"label": "Critic AI",
|
||||
"systemPrompt": "You are a critic who evaluates and scores drafts.",
|
||||
"model": "claude-3-5-sonnet",
|
||||
"tools": {"webSearch": False, "pdfReader": False},
|
||||
},
|
||||
],
|
||||
"edges": [
|
||||
{"id": "e1", "source": "master", "target": "critic", "type": "linear"},
|
||||
],
|
||||
}
|
||||
|
||||
@patch("services.dynamic_graph_builder._get_llm")
|
||||
def test_build_graph_with_god_mode_compiles(self, mock_get_llm):
|
||||
"""God mode graph should compile without error."""
|
||||
from services.dynamic_graph_builder import build_graph_from_blueprint
|
||||
|
||||
blueprint = self._make_simple_blueprint()
|
||||
graph = build_graph_from_blueprint(blueprint, god_mode=False)
|
||||
assert graph is not None
|
||||
|
||||
def test_build_graph_without_god_mode(self):
|
||||
"""Normal graph should compile without interrupt_before."""
|
||||
from services.dynamic_graph_builder import build_graph_from_blueprint
|
||||
|
||||
blueprint = self._make_simple_blueprint()
|
||||
graph = build_graph_from_blueprint(blueprint, god_mode=False)
|
||||
assert graph is not None
|
||||
|
||||
|
||||
class TestGodModeSessionManagement:
|
||||
"""Tests for god mode session management functions."""
|
||||
|
||||
def test_get_god_mode_state_returns_none_for_unknown_run(self):
|
||||
from services.dynamic_graph_builder import get_god_mode_state
|
||||
|
||||
result = get_god_mode_state("nonexistent-run-id")
|
||||
assert result is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_resume_god_mode_returns_none_for_unknown_run(self):
|
||||
from services.dynamic_graph_builder import resume_god_mode
|
||||
|
||||
result = await resume_god_mode("nonexistent-run-id", action="approve")
|
||||
assert result is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_resume_god_mode_reject_cleans_up(self):
|
||||
from services.dynamic_graph_builder import (
|
||||
_god_mode_sessions,
|
||||
resume_god_mode,
|
||||
)
|
||||
|
||||
# Manually insert a fake session
|
||||
_god_mode_sessions["test-run"] = {
|
||||
"graph": MagicMock(),
|
||||
"checkpointer": MagicMock(),
|
||||
"thread_config": {"configurable": {"thread_id": "test-run"}},
|
||||
}
|
||||
|
||||
result = await resume_god_mode("test-run", action="reject")
|
||||
assert result is None
|
||||
assert "test-run" not in _god_mode_sessions
|
||||
|
||||
|
||||
class TestToolResolution:
|
||||
"""Tests for the tool resolution helper."""
|
||||
|
||||
def test_resolve_tools_none_config(self):
|
||||
from services.dynamic_graph_builder import _resolve_tools
|
||||
|
||||
assert _resolve_tools(None) == []
|
||||
|
||||
def test_resolve_tools_empty_config(self):
|
||||
from services.dynamic_graph_builder import _resolve_tools
|
||||
|
||||
assert _resolve_tools({}) == []
|
||||
|
||||
def test_resolve_tools_web_search_only(self):
|
||||
from services.dynamic_graph_builder import _resolve_tools
|
||||
|
||||
tools = _resolve_tools({"webSearch": True, "pdfReader": False})
|
||||
assert len(tools) == 1
|
||||
assert tools[0].name == "web_search"
|
||||
|
||||
def test_resolve_tools_pdf_only(self):
|
||||
from services.dynamic_graph_builder import _resolve_tools
|
||||
|
||||
tools = _resolve_tools({"webSearch": False, "pdfReader": True})
|
||||
assert len(tools) == 1
|
||||
assert tools[0].name == "pdf_search"
|
||||
|
||||
def test_resolve_tools_both(self):
|
||||
from services.dynamic_graph_builder import _resolve_tools
|
||||
|
||||
tools = _resolve_tools({"webSearch": True, "pdfReader": True})
|
||||
assert len(tools) == 2
|
||||
names = {t.name for t in tools}
|
||||
assert names == {"web_search", "pdf_search"}
|
||||
|
||||
|
||||
class TestInvokeWithTools:
|
||||
"""Tests for the _invoke_with_tools helper."""
|
||||
|
||||
def test_invoke_without_tools_calls_llm_directly(self):
|
||||
from services.dynamic_graph_builder import _invoke_with_tools
|
||||
|
||||
mock_llm = MagicMock()
|
||||
mock_response = MagicMock()
|
||||
mock_response.content = "Test response"
|
||||
mock_llm.invoke.return_value = mock_response
|
||||
|
||||
result = _invoke_with_tools(mock_llm, ["msg1", "msg2"], [])
|
||||
mock_llm.invoke.assert_called_once_with(["msg1", "msg2"])
|
||||
assert result == mock_response
|
||||
|
||||
def test_invoke_with_tools_no_tool_calls(self):
|
||||
from services.dynamic_graph_builder import _invoke_with_tools
|
||||
|
||||
mock_llm = MagicMock()
|
||||
mock_bound = MagicMock()
|
||||
mock_llm.bind_tools.return_value = mock_bound
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.tool_calls = []
|
||||
mock_response.content = "No tools needed"
|
||||
mock_bound.invoke.return_value = mock_response
|
||||
|
||||
mock_tool = MagicMock()
|
||||
mock_tool.name = "test_tool"
|
||||
|
||||
result = _invoke_with_tools(mock_llm, ["msg"], [mock_tool])
|
||||
assert result == mock_response
|
||||
|
||||
def test_invoke_with_tools_executes_tool_calls(self):
|
||||
from services.dynamic_graph_builder import _invoke_with_tools
|
||||
|
||||
mock_llm = MagicMock()
|
||||
mock_bound = MagicMock()
|
||||
mock_llm.bind_tools.return_value = mock_bound
|
||||
|
||||
# First call returns tool_calls
|
||||
mock_response_with_tools = MagicMock()
|
||||
mock_response_with_tools.tool_calls = [
|
||||
{"name": "web_search", "args": {"query": "test"}, "id": "call-1"}
|
||||
]
|
||||
|
||||
# Second call returns final answer
|
||||
mock_final_response = MagicMock()
|
||||
mock_final_response.content = "Final answer"
|
||||
mock_bound.invoke.side_effect = [mock_response_with_tools, mock_final_response]
|
||||
|
||||
mock_tool = MagicMock()
|
||||
mock_tool.name = "web_search"
|
||||
mock_tool.invoke.return_value = "Search results"
|
||||
|
||||
result = _invoke_with_tools(mock_llm, ["msg"], [mock_tool])
|
||||
mock_tool.invoke.assert_called_once_with({"query": "test"})
|
||||
assert result == mock_final_response
|
||||
82
backend/tests/test_run_service.py
Normal file
82
backend/tests/test_run_service.py
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
"""
|
||||
Tests for the run history service and CouncilRun model.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
|
||||
import pytest
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
|
||||
class TestCouncilRunModel:
|
||||
"""Tests for the CouncilRun SQLAlchemy model."""
|
||||
|
||||
def test_to_dict_serialization(self):
|
||||
from models.council_run import CouncilRun
|
||||
from datetime import datetime, timezone
|
||||
|
||||
run = CouncilRun(
|
||||
id="test-id",
|
||||
blueprint_id="bp-id",
|
||||
input_topic="Test topic",
|
||||
status="completed",
|
||||
execution_mode="auto-pilot",
|
||||
final_draft="Final text",
|
||||
critic_score=8.5,
|
||||
iteration_count=3,
|
||||
active_node="done",
|
||||
error=None,
|
||||
created_at=datetime(2026, 1, 1, tzinfo=timezone.utc),
|
||||
completed_at=datetime(2026, 1, 1, 0, 5, tzinfo=timezone.utc),
|
||||
)
|
||||
|
||||
d = run.to_dict()
|
||||
assert d["id"] == "test-id"
|
||||
assert d["blueprint_id"] == "bp-id"
|
||||
assert d["status"] == "completed"
|
||||
assert d["critic_score"] == 8.5
|
||||
assert d["iteration_count"] == 3
|
||||
assert d["created_at"] is not None
|
||||
assert d["completed_at"] is not None
|
||||
|
||||
def test_to_dict_with_none_timestamps(self):
|
||||
from models.council_run import CouncilRun
|
||||
|
||||
run = CouncilRun(
|
||||
id="test-id",
|
||||
input_topic="Test",
|
||||
status="pending",
|
||||
execution_mode="god-mode",
|
||||
created_at=None,
|
||||
completed_at=None,
|
||||
)
|
||||
|
||||
d = run.to_dict()
|
||||
assert d["created_at"] is None
|
||||
assert d["completed_at"] is None
|
||||
assert d["execution_mode"] == "god-mode"
|
||||
|
||||
|
||||
class TestRunHistoryRoutes:
|
||||
"""Tests for the run history API routes."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_list_runs_empty(self):
|
||||
"""List runs returns empty list when no runs exist."""
|
||||
from api.run_history_routes import list_all_runs
|
||||
|
||||
mock_session = AsyncMock()
|
||||
|
||||
mock_result = MagicMock()
|
||||
mock_scalars = MagicMock()
|
||||
mock_scalars.all.return_value = []
|
||||
mock_result.scalars.return_value = mock_scalars
|
||||
mock_session.execute.return_value = mock_result
|
||||
|
||||
with patch("services.run_service.list_runs") as mock_list:
|
||||
mock_list.return_value = []
|
||||
result = await list_all_runs(limit=50, offset=0, session=mock_session)
|
||||
assert result == []
|
||||
170
backend/tests/test_tools.py
Normal file
170
backend/tests/test_tools.py
Normal file
|
|
@ -0,0 +1,170 @@
|
|||
"""
|
||||
Tests for agent tools (web search and PDF reader).
|
||||
|
||||
All external API calls are mocked — no real calls to Tavily or ChromaDB.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
|
||||
class TestWebSearchTool:
|
||||
"""Tests for the Tavily web search tool."""
|
||||
|
||||
@patch.dict(os.environ, {"TAVILY_API_KEY": ""}, clear=False)
|
||||
def test_web_search_returns_error_without_api_key(self):
|
||||
from tools.web_search import web_search
|
||||
|
||||
result = web_search.invoke({"query": "test query"})
|
||||
assert "TAVILY_API_KEY" in result
|
||||
|
||||
@patch.dict(os.environ, {"TAVILY_API_KEY": "test-key"}, clear=False)
|
||||
@patch("tools.web_search.TavilyClient")
|
||||
def test_web_search_returns_formatted_results(self, mock_client_cls):
|
||||
mock_client = MagicMock()
|
||||
mock_client.search.return_value = {
|
||||
"results": [
|
||||
{
|
||||
"title": "Test Result",
|
||||
"url": "https://example.com",
|
||||
"content": "Some content here",
|
||||
}
|
||||
]
|
||||
}
|
||||
mock_client_cls.return_value = mock_client
|
||||
|
||||
from tools.web_search import web_search
|
||||
|
||||
result = web_search.invoke({"query": "test query"})
|
||||
assert "Test Result" in result
|
||||
assert "https://example.com" in result
|
||||
assert "Some content here" in result
|
||||
|
||||
@patch.dict(os.environ, {"TAVILY_API_KEY": "test-key"}, clear=False)
|
||||
@patch("tools.web_search.TavilyClient")
|
||||
def test_web_search_handles_empty_results(self, mock_client_cls):
|
||||
mock_client = MagicMock()
|
||||
mock_client.search.return_value = {"results": []}
|
||||
mock_client_cls.return_value = mock_client
|
||||
|
||||
from tools.web_search import web_search
|
||||
|
||||
result = web_search.invoke({"query": "obscure query"})
|
||||
assert "No results" in result
|
||||
|
||||
@patch.dict(os.environ, {"TAVILY_API_KEY": "test-key"}, clear=False)
|
||||
@patch("tools.web_search.TavilyClient")
|
||||
def test_web_search_handles_api_error(self, mock_client_cls):
|
||||
mock_client = MagicMock()
|
||||
mock_client.search.side_effect = Exception("API rate limit")
|
||||
mock_client_cls.return_value = mock_client
|
||||
|
||||
from tools.web_search import web_search
|
||||
|
||||
result = web_search.invoke({"query": "test"})
|
||||
assert "Error" in result
|
||||
assert "rate limit" in result
|
||||
|
||||
|
||||
class TestCreateWebSearchTool:
|
||||
"""Tests for the web search tool factory."""
|
||||
|
||||
@patch.dict(os.environ, {"TAVILY_API_KEY": "test-key"}, clear=False)
|
||||
def test_factory_returns_tool_when_key_set(self):
|
||||
from tools.web_search import create_web_search_tool
|
||||
|
||||
tool = create_web_search_tool()
|
||||
assert tool is not None
|
||||
|
||||
@patch.dict(os.environ, {}, clear=True)
|
||||
def test_factory_returns_none_when_key_missing(self):
|
||||
from tools.web_search import create_web_search_tool
|
||||
|
||||
tool = create_web_search_tool()
|
||||
assert tool is None
|
||||
|
||||
|
||||
class TestPdfSearchTool:
|
||||
"""Tests for the PDF reader tool."""
|
||||
|
||||
@patch("tools.pdf_reader._get_chroma_collection")
|
||||
def test_pdf_search_empty_collection(self, mock_get_collection):
|
||||
mock_collection = MagicMock()
|
||||
mock_collection.count.return_value = 0
|
||||
mock_get_collection.return_value = mock_collection
|
||||
|
||||
from tools.pdf_reader import pdf_search
|
||||
|
||||
result = pdf_search.invoke({"query": "test query"})
|
||||
assert "No documents" in result
|
||||
|
||||
@patch("tools.pdf_reader._get_chroma_collection")
|
||||
def test_pdf_search_returns_results(self, mock_get_collection):
|
||||
mock_collection = MagicMock()
|
||||
mock_collection.count.return_value = 3
|
||||
mock_collection.query.return_value = {
|
||||
"documents": [["First passage about AI.", "Second passage about ML."]],
|
||||
"metadatas": [[
|
||||
{"source": "paper.pdf", "page": 1},
|
||||
{"source": "paper.pdf", "page": 3},
|
||||
]],
|
||||
}
|
||||
mock_get_collection.return_value = mock_collection
|
||||
|
||||
from tools.pdf_reader import pdf_search
|
||||
|
||||
result = pdf_search.invoke({"query": "AI concepts"})
|
||||
assert "paper.pdf" in result
|
||||
assert "First passage" in result
|
||||
assert "Page 1" in result
|
||||
|
||||
@patch("tools.pdf_reader._get_chroma_collection")
|
||||
def test_pdf_search_handles_error(self, mock_get_collection):
|
||||
mock_get_collection.side_effect = Exception("ChromaDB unavailable")
|
||||
|
||||
from tools.pdf_reader import pdf_search
|
||||
|
||||
result = pdf_search.invoke({"query": "test"})
|
||||
assert "Error" in result
|
||||
|
||||
|
||||
class TestPdfIngestion:
|
||||
"""Tests for PDF ingestion into ChromaDB."""
|
||||
|
||||
@patch("tools.pdf_reader._get_chroma_collection")
|
||||
@patch("tools.pdf_reader.PdfReader")
|
||||
def test_ingest_pdf_processes_pages(self, mock_pdf_reader_cls, mock_get_collection):
|
||||
# Mock PDF with 2 pages of text
|
||||
mock_page1 = MagicMock()
|
||||
mock_page1.extract_text.return_value = "This is the first page with some content " * 20
|
||||
mock_page2 = MagicMock()
|
||||
mock_page2.extract_text.return_value = "Second page about machine learning " * 20
|
||||
mock_reader = MagicMock()
|
||||
mock_reader.pages = [mock_page1, mock_page2]
|
||||
mock_pdf_reader_cls.return_value = mock_reader
|
||||
|
||||
mock_collection = MagicMock()
|
||||
mock_get_collection.return_value = mock_collection
|
||||
|
||||
from tools.pdf_reader import ingest_pdf
|
||||
|
||||
chunks = ingest_pdf("/tmp/test.pdf")
|
||||
assert chunks > 0
|
||||
mock_collection.upsert.assert_called_once()
|
||||
|
||||
@patch("tools.pdf_reader._get_chroma_collection")
|
||||
@patch("tools.pdf_reader.PdfReader")
|
||||
def test_ingest_pdf_empty_file(self, mock_pdf_reader_cls, mock_get_collection):
|
||||
mock_reader = MagicMock()
|
||||
mock_reader.pages = []
|
||||
mock_pdf_reader_cls.return_value = mock_reader
|
||||
|
||||
from tools.pdf_reader import ingest_pdf
|
||||
|
||||
chunks = ingest_pdf("/tmp/empty.pdf")
|
||||
assert chunks == 0
|
||||
Loading…
Add table
Add a link
Reference in a new issue