diff --git a/.gitea/workflows/docker-build.yaml b/.gitea/workflows/docker-build.yaml index 9bbe8a4..48cd4ca 100644 --- a/.gitea/workflows/docker-build.yaml +++ b/.gitea/workflows/docker-build.yaml @@ -9,11 +9,55 @@ on: - cron: '0 0 * * *' jobs: - build: + test: runs-on: ubuntu-latest steps: - - name: Checkout - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6 + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6 + - uses: actions/setup-python@v6 + with: + python-version: "3.13" + - name: Install dependencies + run: | + pip install -r requirements.txt pytest + - name: Run unit tests + run: pytest tests/ -v --ignore=tests/test_integration.py + + integration-test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6 + + - name: Start Qdrant + run: | + docker run -d --name qdrant \ + --network "container:$(hostname)" \ + docker.io/qdrant/qdrant:latest + + - name: Wait for Qdrant + run: | + for i in $(seq 1 30); do + curl -s http://localhost:6333/healthz && echo "QDRANT ready" && break + echo "Waiting for Qdrant... ($i/30)" + sleep 1 + done + + - uses: actions/setup-python@v6 + with: + python-version: "3.13" + - name: Install dependencies + run: | + pip install -r requirements.txt pytest + - name: Run integration tests + run: pytest tests/test_integration.py -v + env: + QDRANT_URL: http://localhost:6333 + COLLECTION_NAME: test_mcp_maildir + + build: + runs-on: ubuntu-latest + needs: [test, integration-test] + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6 - name: Set up Docker Buildx uses: docker/setup-buildx-action@d7f5e7f509e45cec5c76c4d5afdd7de93d0b3df5 # v4 @@ -31,9 +75,6 @@ jobs: with: images: jcabillot/mcp-maildir tags: | - #type=ref,event=branch - #type=ref,event=pr - #type=sha type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }} - name: Build and push diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..80432c2 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +testpaths = tests +pythonpath = src diff --git a/tests/test_integration.py b/tests/test_integration.py new file mode 100644 index 0000000..dd53c7f --- /dev/null +++ b/tests/test_integration.py @@ -0,0 +1,189 @@ +"""Integration tests for mcp-maildir with a real Qdrant instance.""" + +import os +import uuid +import pytest + +os.environ["QDRANT_URL"] = "http://localhost:6333" +os.environ["COLLECTION_NAME"] = "test_mcp_maildir" +os.environ["EMBEDDING_MODEL_NAME"] = "BAAI/bge-small-en-v1.5" + +from server import get_qdrant_client, get_embedding_model, search_emails, read_email +from qdrant_client.http import models + + +TEST_EMAILS = [ + { + "message_id": "", + "date": "2026-01-15T10:00:00", + "sender": "alice@example.com", + "sender_raw": "Alice ", + "receiver": "bob@example.com", + "receiver_raw": "Bob ", + "subject": "Hello World", + "body_text": "This is a test email about Python programming and vector databases.", + "attachments": [], + }, + { + "message_id": "", + "date": "2026-01-16T14:30:00", + "sender": "carol@other.com", + "sender_raw": "Carol ", + "receiver": "alice@example.com", + "receiver_raw": "Alice ", + "subject": "Qdrant setup help", + "body_text": "Can you help me set up Qdrant for semantic email search?", + "attachments": ["screenshot.png"], + }, + { + "message_id": "", + "date": "2026-02-01T09:00:00", + "sender": "bob@example.com", + "sender_raw": "Bob ", + "receiver": "alice@example.com", + "receiver_raw": "Alice ", + "subject": "Meeting next week", + "body_text": "Let's discuss the project roadmap and data pipeline.", + "attachments": [], + }, +] + + +@pytest.fixture(scope="module") +def qdrant_setup(): + """Create collection and index test emails once per test run.""" + client = get_qdrant_client() + model = get_embedding_model() + collection_name = os.environ["COLLECTION_NAME"] + + # Clean up from previous runs + try: + client.delete_collection(collection_name) + except Exception: + pass + + # Create collection + probe = next(iter(model.embed(["dimension_probe"]))) + client.create_collection( + collection_name=collection_name, + vectors_config=models.VectorParams( + size=len(probe), distance=models.Distance.COSINE + ), + ) + + # Index test emails + for email in TEST_EMAILS: + vector_text = ( + f"Date: {email['date']}\n" + f"From: {email['sender']}\n" + f"To: {email['receiver']}\n" + f"Subject: {email['subject']}\n\n" + f"{email['body_text']}\n\n" + f"Attachments: {', '.join(email['attachments']) if email['attachments'] else 'None'}" + ) + vector = list(model.embed([vector_text]))[0].tolist() + + # Create payload indexes on first upsert + client.create_payload_index( + collection_name=collection_name, + field_name="sender", + field_schema=models.PayloadSchemaType.KEYWORD, + ) + client.create_payload_index( + collection_name=collection_name, + field_name="receiver", + field_schema=models.PayloadSchemaType.KEYWORD, + ) + client.create_payload_index( + collection_name=collection_name, + field_name="date", + field_schema=models.PayloadSchemaType.DATETIME, + ) + + client.upsert( + collection_name=collection_name, + points=[ + models.PointStruct( + id=str(uuid.uuid5(uuid.NAMESPACE_OID, email["message_id"])), + vector=vector, + payload={ + "message_id": email["message_id"], + "date": email["date"], + "sender": email["sender"], + "sender_raw": email["sender_raw"], + "receiver": email["receiver"], + "receiver_raw": email["receiver_raw"], + "subject": email["subject"], + "body_text": email["body_text"], + "attachments": email["attachments"], + }, + ) + ], + ) + + yield client + + # Cleanup + try: + client.delete_collection(collection_name) + except Exception: + pass + + +class TestSearchEmails: + def test_search_by_content(self, qdrant_setup): + """Semantic search returns the most relevant email first. + + Searching for 'Python programming' should match email 001 + which discusses Python and vector databases. + """ + result = search_emails(query="Python programming") + assert result["count"] >= 1 + assert "results" in result + # The top result should be email 001 (most semantically relevant) + messages = [r["message_id"] for r in result["results"]] + assert "" in messages + + def test_search_with_participant_filter(self, qdrant_setup): + """Search emails sent by alice.""" + result = search_emails(query="help", participant="alice@example.com") + assert result["count"] >= 1 + messages = [r["message_id"] for r in result["results"]] + assert "" in messages + + def test_search_with_date_filter(self, qdrant_setup): + """Search emails after a specific date.""" + result = search_emails(query="meeting", start_date="2026-02-01") + assert result["count"] >= 1 + for r in result["results"]: + date = r.get("date", "") + assert date >= "2026-02-01" + + def test_search_no_results_by_date(self, qdrant_setup): + """Search with a filter that matches no emails returns empty results. + + With semantic (vector) search, any text query always has nearest + neighbors, so we use a date filter that matches nothing instead. + """ + result = search_emails( + query="anything", + start_date="2099-01-01", + end_date="2099-12-31", + ) + assert result["count"] == 0 + assert result["results"] == [] + + +class TestReadEmail: + def test_read_existing_email(self, qdrant_setup): + """Read an email by its message_id.""" + result = read_email(message_id="") + assert "error" not in result + assert result["message_id"] == "" + assert "This is a test email" in result.get("body_text", "") + + def test_read_nonexistent_email(self, qdrant_setup): + """Read an email that doesn't exist.""" + result = read_email(message_id="") + assert "error" in result + assert "No email found" in result["error"] diff --git a/tests/test_server.py b/tests/test_server.py new file mode 100644 index 0000000..a4c71d8 --- /dev/null +++ b/tests/test_server.py @@ -0,0 +1,110 @@ +"""Unit tests for mcp-maildir server pure functions.""" + +import os +os.environ["QDRANT_URL"] = "http://localhost:6333" +os.environ["COLLECTION_NAME"] = "test" + +from server import ( + normalize_email_address, + payload_matches_participant, + format_search_result, +) + + +class TestNormalizeEmailAddress: + def test_empty_value(self): + assert normalize_email_address("") == "" + assert normalize_email_address(None) == "" + + def test_simple_email(self): + assert normalize_email_address("user@example.com") == "user@example.com" + + def test_display_name_with_email(self): + assert normalize_email_address("John Doe ") == "john@example.com" + + def test_whitespace_and_case(self): + assert normalize_email_address(" USER@Example.COM ") == "user@example.com" + + def test_invalid_email_fallback(self): + result = normalize_email_address("not-an-email") + # Returns stripped lowercase version of the input + assert result == "not-an-email" + + +class TestPayloadMatchesParticipant: + def test_sender_match_normalized(self): + payload = {"sender": "alice@example.com"} + assert payload_matches_participant(payload, "alice@example.com") is True + + def test_receiver_match_normalized(self): + payload = {"receiver": "bob@example.com"} + assert payload_matches_participant(payload, "bob@example.com") is True + + def test_sender_raw_match(self): + payload = {"sender_raw": "alice@other.com"} + assert payload_matches_participant(payload, "alice@other.com") is True + + def test_no_match(self): + payload = {"sender": "alice@example.com", "receiver": "bob@example.com"} + assert payload_matches_participant(payload, "carol@example.com") is False + + def test_display_name_in_sender(self): + payload = {"sender": "Alice "} + assert payload_matches_participant(payload, "alice@example.com") is True + + def test_display_name_in_receiver(self): + payload = {"receiver": "Bob Smith "} + assert payload_matches_participant(payload, "bob@example.com") is True + + def test_empty_payload(self): + assert payload_matches_participant({}, "someone@example.com") is False + + def test_case_insensitive(self): + payload = {"sender": "ALICE@EXAMPLE.COM"} + assert payload_matches_participant(payload, "alice@example.com") is True + + +class TestFormatSearchResult: + def test_basic_formatting(self): + class MockPoint: + payload = { + "message_id": "", + "date": "2026-01-15T10:00:00", + "sender": "alice@example.com", + "receiver": "bob@example.com", + "subject": "Hello", + "attachments": ["file.pdf"], + } + score = 0.95 + + result = format_search_result(MockPoint()) + assert result["message_id"] == "" + assert result["date"] == "2026-01-15T10:00:00" + assert result["sender"] == "alice@example.com" + assert result["receiver"] == "bob@example.com" + assert result["subject"] == "Hello" + assert result["attachments"] == ["file.pdf"] + assert result["score"] == 0.95 + + def test_empty_payload(self): + class MockPoint: + payload = None + score = None + + result = format_search_result(MockPoint()) + assert result["message_id"] is None + assert result["score"] is None + assert result["attachments"] == [] + + def test_missing_fields(self): + class MockPoint: + payload = {"message_id": "<123>"} + score = 0.5 + + result = format_search_result(MockPoint()) + assert result["message_id"] == "<123>" + assert result["date"] is None + assert result["sender"] is None + assert result["receiver"] is None + assert result["subject"] is None + assert result["attachments"] == []