Skip to main content

Testing CitadelMesh Agents

Comprehensive guide to testing LangGraph agents with unit tests, integration tests, and debugging techniques.

Testing Strategy

Unit Tests → Integration Tests → E2E Tests → Production Monitoring
↓ ↓ ↓ ↓
Mock all Real services Full stack Real building

Unit Testing

Test State Machine Logic

import pytest
from my_agent import MyAgent, MyState, AgentConfig

@pytest.mark.asyncio
async def test_state_transitions():
"""Test state machine transitions"""

config = AgentConfig(
agent_id="test",
agent_type="test",
spiffe_id="spiffe://test",
enable_safety_checks=False, # Disable for unit tests
enable_telemetry=False
)

agent = MyAgent(config)

# Test initial state
state = MyState(value=0)

# Run through graph
result = await agent.graph.ainvoke(state)

# Assert final state
assert result.value == 10
assert result.error is None

Test Individual Nodes

@pytest.mark.asyncio
async def test_analyze_node():
"""Test single node in isolation"""

agent = MyAgent(test_config)

state = MyState(
events=[
{"type": "alert", "severity": "high"},
{"type": "alert", "severity": "low"}
]
)

# Call node directly
result = await agent._analyze_threat(state)

assert result.threat_level == ThreatLevel.HIGH

Mock External Dependencies

from unittest.mock import AsyncMock, patch

@pytest.mark.asyncio
async def test_with_mocked_mcp():
"""Test with mocked MCP client"""

agent = MyAgent(test_config)

# Mock MCP client
agent.mcp_client = AsyncMock()
agent.mcp_client.unlock_door.return_value = {
"status": "unlocked",
"door_id": "test-door"
}

state = MyState(doors_to_unlock=["test-door"])

result = await agent._execute_door_control(state)

# Verify MCP was called
agent.mcp_client.unlock_door.assert_called_once_with("test-door")
assert result.unlocked_doors == ["test-door"]

Integration Testing

Test with Real Services

@pytest.mark.integration
@pytest.mark.asyncio
async def test_with_real_nats():
"""Test with real NATS server"""

config = AgentConfig(
agent_id="integration-test",
agent_type="security",
spiffe_id="spiffe://test",
nats_url="nats://localhost:4222"
)

agent = MyAgent(config)
await agent.start()

try:
# Test event processing
event = CloudEventMessage(
id="test-1",
source="/test",
type="citadel.test.event",
data={"action": "unlock"}
)

result = await agent.process_event(event)
assert result is not None

finally:
await agent.stop()

Test Policy Integration

@pytest.mark.integration
@pytest.mark.asyncio
async def test_opa_integration():
"""Test OPA policy checks"""

# Assumes OPA running at localhost:8181
agent = MyAgent(test_config)
agent.config.enable_safety_checks = True

# Test allowed action
allowed = await agent.check_safety_policy(
"door_unlock",
{
"door_id": "test-door",
"duration_seconds": 300
}
)

assert allowed == True

# Test denied action
allowed = await agent.check_safety_policy(
"door_unlock",
{
"door_id": "test-door",
"duration_seconds": 1000 # Exceeds limit
}
)

assert allowed == False

Debugging Techniques

1. State Inspection

async def debug_agent_execution():
"""Step through agent execution"""

agent = MyAgent(test_config)
state = MyState()

# Use stream to see each step
async for step in agent.graph.astream(state):
print(f"Step: {step}")
print(f"State: {step}")
input("Press Enter for next step...")

2. Trace Logging

import logging

logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)

# Agent will log all operations
agent = MyAgent(test_config)

3. Breakpoint Debugging

async def _analyze_threat(self, state: SecurityState) -> SecurityState:
"""Analyze with breakpoint"""

import pdb; pdb.set_trace() # Debugger stops here

# Step through code with:
# n (next line)
# s (step into function)
# c (continue)
# p variable (print variable)

return state

4. Visual Debugging

# Generate state machine diagram
graph = agent.build_graph()
print(graph.get_graph().draw_mermaid())

# Or export to file
with open("agent_graph.md", "w") as f:
f.write("```mermaid\n")
f.write(graph.get_graph().draw_mermaid())
f.write("\n```")

Test Fixtures

Pytest Fixtures

import pytest
from agents.runtime.base_agent import AgentConfig

@pytest.fixture
def test_config():
"""Agent config for testing"""
return AgentConfig(
agent_id="test-agent",
agent_type="test",
spiffe_id="spiffe://test",
nats_url="nats://localhost:4222",
enable_safety_checks=False,
enable_telemetry=False,
log_level="DEBUG"
)

@pytest.fixture
async def test_agent(test_config):
"""Initialized test agent"""
agent = MyAgent(test_config)
await agent.start()
yield agent
await agent.stop()

@pytest.fixture
def mock_events():
"""Sample events for testing"""
return [
{
"id": "event-1",
"type": "security.alert",
"severity": "high",
"zone": "lobby"
},
{
"id": "event-2",
"type": "security.alert",
"severity": "low",
"zone": "parking"
}
]

# Use fixtures in tests
@pytest.mark.asyncio
async def test_with_fixtures(test_agent, mock_events):
"""Test using fixtures"""
state = SecurityState(events=mock_events)
result = await test_agent.graph.ainvoke(state)
assert result.threat_level is not None

Mock Mode Testing

Enable mock mode to test without infrastructure:

@pytest.mark.asyncio
async def test_mock_mode():
"""Test agent in mock mode"""

config = AgentConfig(
agent_id="mock-test",
agent_type="test",
spiffe_id="spiffe://test",
nats_url="nats://nonexistent:4222" # Will fall back to mock
)

agent = MyAgent(config)
await agent.start() # Won't fail, uses mock event bus

# Agent runs but doesn't connect to real services
assert agent.event_bus._mock_mode == True

Performance Testing

Measure Execution Time

import time

@pytest.mark.asyncio
async def test_execution_performance():
"""Measure agent performance"""

agent = MyAgent(test_config)
state = MyState()

iterations = 100
start = time.time()

for _ in range(iterations):
await agent.graph.ainvoke(state)

duration = time.time() - start
avg_time = duration / iterations

print(f"Average execution time: {avg_time*1000:.2f}ms")

# Assert performance requirement
assert avg_time < 0.1 # Must complete in <100ms

Load Testing

import asyncio

@pytest.mark.asyncio
async def test_concurrent_execution():
"""Test agent under load"""

agent = MyAgent(test_config)

async def process_one():
return await agent.graph.ainvoke(MyState())

# 100 concurrent executions
tasks = [process_one() for _ in range(100)]
start = time.time()

results = await asyncio.gather(*tasks)

duration = time.time() - start
throughput = len(results) / duration

print(f"Throughput: {throughput:.2f} req/s")

assert all(r.error is None for r in results)

CI/CD Integration

GitHub Actions

# .github/workflows/agent-tests.yml
name: Agent Tests

on: [push, pull_request]

jobs:
test:
runs-on: ubuntu-latest

services:
nats:
image: nats:2.10
ports:
- 4222:4222

opa:
image: openpolicyagent/opa:latest
ports:
- 8181:8181

steps:
- uses: actions/checkout@v4

- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.12'

- name: Install dependencies
run: |
pip install -r src/agents/requirements.txt
pip install pytest pytest-asyncio pytest-cov

- name: Run tests
run: |
pytest src/agents/tests/ \
--cov=src/agents \
--cov-report=xml \
--cov-report=term

- name: Upload coverage
uses: codecov/codecov-action@v3
with:
file: ./coverage.xml

Troubleshooting Tests

Issue: AsyncIO warnings

# Fix: Use pytest-asyncio properly
import pytest

@pytest.mark.asyncio
async def test_async_function():
# Your async test
pass

Issue: Tests hang

# Add timeout
@pytest.mark.asyncio
@pytest.mark.timeout(10) # 10 second timeout
async def test_with_timeout():
# Test that might hang
pass

Issue: Intermittent failures

# Add retries for flaky tests
@pytest.mark.flaky(reruns=3)
@pytest.mark.asyncio
async def test_flaky_operation():
# Test that sometimes fails
pass

Best Practices

  1. Test in isolation - Unit test nodes individually
  2. Mock external systems - Use mocks for MCP, NATS, OPA
  3. Test edge cases - Null values, timeouts, errors
  4. Integration test sparingly - Only for critical paths
  5. Use fixtures - Reuse test setup
  6. Measure coverage - Aim for >80%
  7. Performance test - Ensure <100ms execution
  8. Run in CI/CD - Automated testing

Next Steps


Tested agents are reliable agents! Continue to MCP Basics.