Testing CitadelMesh Agents
Comprehensive guide to testing LangGraph agents with unit tests, integration tests, and debugging techniques.
Testing Strategy
Unit Tests → Integration Tests → E2E Tests → Production Monitoring
    ↓              ↓                  ↓              ↓
  Mock all    Real services    Full stack    Real building
Unit Testing
Test State Machine Logic
import pytest
from my_agent import MyAgent, MyState, AgentConfig
@pytest.mark.asyncio
async def test_state_transitions():
    """Test state machine transitions"""
    config = AgentConfig(
        agent_id="test",
        agent_type="test",
        spiffe_id="spiffe://test",
        enable_safety_checks=False,  # Disable for unit tests
        enable_telemetry=False
    )
    agent = MyAgent(config)
    # Test initial state
    state = MyState(value=0)
    # Run through graph
    result = await agent.graph.ainvoke(state)
    # Assert final state
    assert result.value == 10
    assert result.error is None
Test Individual Nodes
@pytest.mark.asyncio
async def test_analyze_node():
    """Test single node in isolation"""
    agent = MyAgent(test_config)
    state = MyState(
        events=[
            {"type": "alert", "severity": "high"},
            {"type": "alert", "severity": "low"}
        ]
    )
    # Call node directly
    result = await agent._analyze_threat(state)
    assert result.threat_level == ThreatLevel.HIGH
Mock External Dependencies
from unittest.mock import AsyncMock, patch
@pytest.mark.asyncio
async def test_with_mocked_mcp():
    """Test with mocked MCP client"""
    agent = MyAgent(test_config)
    # Mock MCP client
    agent.mcp_client = AsyncMock()
    agent.mcp_client.unlock_door.return_value = {
        "status": "unlocked",
        "door_id": "test-door"
    }
    state = MyState(doors_to_unlock=["test-door"])
    result = await agent._execute_door_control(state)
    # Verify MCP was called
    agent.mcp_client.unlock_door.assert_called_once_with("test-door")
    assert result.unlocked_doors == ["test-door"]
Integration Testing
Test with Real Services
@pytest.mark.integration
@pytest.mark.asyncio
async def test_with_real_nats():
    """Test with real NATS server"""
    config = AgentConfig(
        agent_id="integration-test",
        agent_type="security",
        spiffe_id="spiffe://test",
        nats_url="nats://localhost:4222"
    )
    agent = MyAgent(config)
    await agent.start()
    try:
        # Test event processing
        event = CloudEventMessage(
            id="test-1",
            source="/test",
            type="citadel.test.event",
            data={"action": "unlock"}
        )
        result = await agent.process_event(event)
        assert result is not None
    finally:
        await agent.stop()
Test Policy Integration
@pytest.mark.integration
@pytest.mark.asyncio
async def test_opa_integration():
    """Test OPA policy checks"""
    # Assumes OPA running at localhost:8181
    agent = MyAgent(test_config)
    agent.config.enable_safety_checks = True
    # Test allowed action
    allowed = await agent.check_safety_policy(
        "door_unlock",
        {
            "door_id": "test-door",
            "duration_seconds": 300
        }
    )
    assert allowed == True
    # Test denied action
    allowed = await agent.check_safety_policy(
        "door_unlock",
        {
            "door_id": "test-door",
            "duration_seconds": 1000  # Exceeds limit
        }
    )
    assert allowed == False
Debugging Techniques
1. State Inspection
async def debug_agent_execution():
    """Step through agent execution"""
    agent = MyAgent(test_config)
    state = MyState()
    # Use stream to see each step
    async for step in agent.graph.astream(state):
        print(f"Step: {step}")
        print(f"State: {step}")
        input("Press Enter for next step...")
2. Trace Logging
import logging
logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
# Agent will log all operations
agent = MyAgent(test_config)
3. Breakpoint Debugging
async def _analyze_threat(self, state: SecurityState) -> SecurityState:
    """Analyze with breakpoint"""
    import pdb; pdb.set_trace()  # Debugger stops here
    # Step through code with:
    # n (next line)
    # s (step into function)
    # c (continue)
    # p variable (print variable)
    return state
4. Visual Debugging
# Generate state machine diagram
graph = agent.build_graph()
print(graph.get_graph().draw_mermaid())
# Or export to file
with open("agent_graph.md", "w") as f:
    f.write("```mermaid\n")
    f.write(graph.get_graph().draw_mermaid())
    f.write("\n```")
Test Fixtures
Pytest Fixtures
import pytest
from agents.runtime.base_agent import AgentConfig
@pytest.fixture
def test_config():
    """Agent config for testing"""
    return AgentConfig(
        agent_id="test-agent",
        agent_type="test",
        spiffe_id="spiffe://test",
        nats_url="nats://localhost:4222",
        enable_safety_checks=False,
        enable_telemetry=False,
        log_level="DEBUG"
    )
@pytest.fixture
async def test_agent(test_config):
    """Initialized test agent"""
    agent = MyAgent(test_config)
    await agent.start()
    yield agent
    await agent.stop()
@pytest.fixture
def mock_events():
    """Sample events for testing"""
    return [
        {
            "id": "event-1",
            "type": "security.alert",
            "severity": "high",
            "zone": "lobby"
        },
        {
            "id": "event-2",
            "type": "security.alert",
            "severity": "low",
            "zone": "parking"
        }
    ]
# Use fixtures in tests
@pytest.mark.asyncio
async def test_with_fixtures(test_agent, mock_events):
    """Test using fixtures"""
    state = SecurityState(events=mock_events)
    result = await test_agent.graph.ainvoke(state)
    assert result.threat_level is not None
Mock Mode Testing
Enable mock mode to test without infrastructure:
@pytest.mark.asyncio
async def test_mock_mode():
    """Test agent in mock mode"""
    config = AgentConfig(
        agent_id="mock-test",
        agent_type="test",
        spiffe_id="spiffe://test",
        nats_url="nats://nonexistent:4222"  # Will fall back to mock
    )
    agent = MyAgent(config)
    await agent.start()  # Won't fail, uses mock event bus
    # Agent runs but doesn't connect to real services
    assert agent.event_bus._mock_mode == True
Performance Testing
Measure Execution Time
import time
@pytest.mark.asyncio
async def test_execution_performance():
    """Measure agent performance"""
    agent = MyAgent(test_config)
    state = MyState()
    iterations = 100
    start = time.time()
    for _ in range(iterations):
        await agent.graph.ainvoke(state)
    duration = time.time() - start
    avg_time = duration / iterations
    print(f"Average execution time: {avg_time*1000:.2f}ms")
    # Assert performance requirement
    assert avg_time < 0.1  # Must complete in <100ms
Load Testing
import asyncio
@pytest.mark.asyncio
async def test_concurrent_execution():
    """Test agent under load"""
    agent = MyAgent(test_config)
    async def process_one():
        return await agent.graph.ainvoke(MyState())
    # 100 concurrent executions
    tasks = [process_one() for _ in range(100)]
    start = time.time()
    results = await asyncio.gather(*tasks)
    duration = time.time() - start
    throughput = len(results) / duration
    print(f"Throughput: {throughput:.2f} req/s")
    assert all(r.error is None for r in results)
CI/CD Integration
GitHub Actions
# .github/workflows/agent-tests.yml
name: Agent Tests
on: [push, pull_request]
jobs:
  test:
    runs-on: ubuntu-latest
    services:
      nats:
        image: nats:2.10
        ports:
          - 4222:4222
      opa:
        image: openpolicyagent/opa:latest
        ports:
          - 8181:8181
    steps:
      - uses: actions/checkout@v4
      - name: Setup Python
        uses: actions/setup-python@v4
        with:
          python-version: '3.12'
      - name: Install dependencies
        run: |
          pip install -r src/agents/requirements.txt
          pip install pytest pytest-asyncio pytest-cov
      - name: Run tests
        run: |
          pytest src/agents/tests/ \
            --cov=src/agents \
            --cov-report=xml \
            --cov-report=term
      - name: Upload coverage
        uses: codecov/codecov-action@v3
        with:
          file: ./coverage.xml
Troubleshooting Tests
Issue: AsyncIO warnings
# Fix: Use pytest-asyncio properly
import pytest
@pytest.mark.asyncio
async def test_async_function():
    # Your async test
    pass
Issue: Tests hang
# Add timeout
@pytest.mark.asyncio
@pytest.mark.timeout(10)  # 10 second timeout
async def test_with_timeout():
    # Test that might hang
    pass
Issue: Intermittent failures
# Add retries for flaky tests
@pytest.mark.flaky(reruns=3)
@pytest.mark.asyncio
async def test_flaky_operation():
    # Test that sometimes fails
    pass
Best Practices
- Test in isolation - Unit test nodes individually
- Mock external systems - Use mocks for MCP, NATS, OPA
- Test edge cases - Null values, timeouts, errors
- Integration test sparingly - Only for critical paths
- Use fixtures - Reuse test setup
- Measure coverage - Aim for >80%
- Performance test - Ensure <100ms execution
- Run in CI/CD - Automated testing
Next Steps
- MCP Adapter Basics - Build MCP servers
- Production Deployment - Deploy agents
- Contributing Guide - Contribute to CitadelMesh
Tested agents are reliable agents! Continue to MCP Basics.