Files
mcp-maildir/tests/test_integration.py
T
cloudix_mcp_server 30ac4ae9ca
Docker Build and Push / test (pull_request) Successful in 14s
Docker Build and Push / integration-test (pull_request) Successful in 1m44s
Docker Build and Push / build (pull_request) Successful in 1m12s
fix: correct integration test assertions for semantic search
- test_search_by_content: format_search_result() does not include
  body_text, so check for the expected message_id instead.
- test_search_no_results: vector cosine similarity always returns
  nearest neighbors; use a date filter far in the future to
  guarantee zero results instead.
2026-06-12 08:25:52 -04:00

190 lines
6.6 KiB
Python

"""Integration tests for mcp-maildir with a real Qdrant instance."""
import os
import uuid
import pytest
os.environ["QDRANT_URL"] = "http://localhost:6333"
os.environ["COLLECTION_NAME"] = "test_mcp_maildir"
os.environ["EMBEDDING_MODEL_NAME"] = "BAAI/bge-small-en-v1.5"
from server import get_qdrant_client, get_embedding_model, search_emails, read_email
from qdrant_client.http import models
TEST_EMAILS = [
{
"message_id": "<test-001@example.com>",
"date": "2026-01-15T10:00:00",
"sender": "alice@example.com",
"sender_raw": "Alice <alice@example.com>",
"receiver": "bob@example.com",
"receiver_raw": "Bob <bob@example.com>",
"subject": "Hello World",
"body_text": "This is a test email about Python programming and vector databases.",
"attachments": [],
},
{
"message_id": "<test-002@example.com>",
"date": "2026-01-16T14:30:00",
"sender": "carol@other.com",
"sender_raw": "Carol <carol@other.com>",
"receiver": "alice@example.com",
"receiver_raw": "Alice <alice@example.com>",
"subject": "Qdrant setup help",
"body_text": "Can you help me set up Qdrant for semantic email search?",
"attachments": ["screenshot.png"],
},
{
"message_id": "<test-003@example.com>",
"date": "2026-02-01T09:00:00",
"sender": "bob@example.com",
"sender_raw": "Bob <bob@example.com>",
"receiver": "alice@example.com",
"receiver_raw": "Alice <alice@example.com>",
"subject": "Meeting next week",
"body_text": "Let's discuss the project roadmap and data pipeline.",
"attachments": [],
},
]
@pytest.fixture(scope="module")
def qdrant_setup():
"""Create collection and index test emails once per test run."""
client = get_qdrant_client()
model = get_embedding_model()
collection_name = os.environ["COLLECTION_NAME"]
# Clean up from previous runs
try:
client.delete_collection(collection_name)
except Exception:
pass
# Create collection
probe = next(iter(model.embed(["dimension_probe"])))
client.create_collection(
collection_name=collection_name,
vectors_config=models.VectorParams(
size=len(probe), distance=models.Distance.COSINE
),
)
# Index test emails
for email in TEST_EMAILS:
vector_text = (
f"Date: {email['date']}\n"
f"From: {email['sender']}\n"
f"To: {email['receiver']}\n"
f"Subject: {email['subject']}\n\n"
f"{email['body_text']}\n\n"
f"Attachments: {', '.join(email['attachments']) if email['attachments'] else 'None'}"
)
vector = list(model.embed([vector_text]))[0].tolist()
# Create payload indexes on first upsert
client.create_payload_index(
collection_name=collection_name,
field_name="sender",
field_schema=models.PayloadSchemaType.KEYWORD,
)
client.create_payload_index(
collection_name=collection_name,
field_name="receiver",
field_schema=models.PayloadSchemaType.KEYWORD,
)
client.create_payload_index(
collection_name=collection_name,
field_name="date",
field_schema=models.PayloadSchemaType.DATETIME,
)
client.upsert(
collection_name=collection_name,
points=[
models.PointStruct(
id=str(uuid.uuid5(uuid.NAMESPACE_OID, email["message_id"])),
vector=vector,
payload={
"message_id": email["message_id"],
"date": email["date"],
"sender": email["sender"],
"sender_raw": email["sender_raw"],
"receiver": email["receiver"],
"receiver_raw": email["receiver_raw"],
"subject": email["subject"],
"body_text": email["body_text"],
"attachments": email["attachments"],
},
)
],
)
yield client
# Cleanup
try:
client.delete_collection(collection_name)
except Exception:
pass
class TestSearchEmails:
def test_search_by_content(self, qdrant_setup):
"""Semantic search returns the most relevant email first.
Searching for 'Python programming' should match email 001
which discusses Python and vector databases.
"""
result = search_emails(query="Python programming")
assert result["count"] >= 1
assert "results" in result
# The top result should be email 001 (most semantically relevant)
messages = [r["message_id"] for r in result["results"]]
assert "<test-001@example.com>" in messages
def test_search_with_participant_filter(self, qdrant_setup):
"""Search emails sent by alice."""
result = search_emails(query="help", participant="alice@example.com")
assert result["count"] >= 1
messages = [r["message_id"] for r in result["results"]]
assert "<test-001@example.com>" in messages
def test_search_with_date_filter(self, qdrant_setup):
"""Search emails after a specific date."""
result = search_emails(query="meeting", start_date="2026-02-01")
assert result["count"] >= 1
for r in result["results"]:
date = r.get("date", "")
assert date >= "2026-02-01"
def test_search_no_results_by_date(self, qdrant_setup):
"""Search with a filter that matches no emails returns empty results.
With semantic (vector) search, any text query always has nearest
neighbors, so we use a date filter that matches nothing instead.
"""
result = search_emails(
query="anything",
start_date="2099-01-01",
end_date="2099-12-31",
)
assert result["count"] == 0
assert result["results"] == []
class TestReadEmail:
def test_read_existing_email(self, qdrant_setup):
"""Read an email by its message_id."""
result = read_email(message_id="<test-001@example.com>")
assert "error" not in result
assert result["message_id"] == "<test-001@example.com>"
assert "This is a test email" in result.get("body_text", "")
def test_read_nonexistent_email(self, qdrant_setup):
"""Read an email that doesn't exist."""
result = read_email(message_id="<nonexistent@ghost.com>")
assert "error" in result
assert "No email found" in result["error"]