Update README

Add Docker
Add SSE support
2026-02-22 16:02:46 +00:00 · 2026-02-22 15:57:57 +00:00 · 2026-02-22 15:52:35 +00:00 · 2026-02-22 15:41:16 +00:00 · 2026-02-22 15:40:59 +00:00
8 changed files with 1793 additions and 4 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,9 @@
 .git/
 .venv/
 .ruff_cache/
 __pycache__/
 *.pyc
 .env
 agent.py
 AGENTS.md
 README.md
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,216 @@
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[codz]
 *$py.class
 # C extensions
 *.so
 # Distribution / packaging
 .Python
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 share/python-wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 MANIFEST
 # PyInstaller
 #   Usually these files are written by a python script from a template
 #   before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
 .nox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
 *.py.cover
 .hypothesis/
 .pytest_cache/
 cover/
 # Translations
 *.mo
 *.pot
 # Django stuff:
 *.log
 local_settings.py
 db.sqlite3
 db.sqlite3-journal
 # Flask stuff:
 instance/
 .webassets-cache
 # Scrapy stuff:
 .scrapy
 # Sphinx documentation
 docs/_build/
 # PyBuilder
 .pybuilder/
 target/
 # Jupyter Notebook
 .ipynb_checkpoints
 # IPython
 profile_default/
 ipython_config.py
 # pyenv
 #   For a library or package, you might want to ignore these files since the code is
 #   intended to run in multiple environments; otherwise, check them in:
 # .python-version
 # pipenv
 #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 #   install all needed dependencies.
 # Pipfile.lock
 # UV
 #   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
 #   This is especially recommended for binary packages to ensure reproducibility, and is more
 #   commonly ignored for libraries.
 # uv.lock
 # poetry
 #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 #   This is especially recommended for binary packages to ensure reproducibility, and is more
 #   commonly ignored for libraries.
 #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
 # poetry.lock
 # poetry.toml
 # pdm
 #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
 #   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
 #   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
 # pdm.lock
 # pdm.toml
 .pdm-python
 .pdm-build/
 # pixi
 #   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
 # pixi.lock
 #   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
 #   in the .venv directory. It is recommended not to include this directory in version control.
 .pixi
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
 __pypackages__/
 # Celery stuff
 celerybeat-schedule
 celerybeat.pid
 # Redis
 *.rdb
 *.aof
 *.pid
 # RabbitMQ
 mnesia/
 rabbitmq/
 rabbitmq-data/
 # ActiveMQ
 activemq-data/
 # SageMath parsed files
 *.sage.py
 # Environments
 .env
 .envrc
 .venv
 env/
 venv/
 ENV/
 env.bak/
 venv.bak/
 # Spyder project settings
 .spyderproject
 .spyproject
 # Rope project settings
 .ropeproject
 # mkdocs documentation
 /site
 # mypy
 .mypy_cache/
 .dmypy.json
 dmypy.json
 # Pyre type checker
 .pyre/
 # pytype static type analyzer
 .pytype/
 # Cython debug symbols
 cython_debug/
 # PyCharm
 #   JetBrains specific template is maintained in a separate JetBrains.gitignore that can
 #   be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #   and can be added to the global gitignore or merged into this file.  For a more nuclear
 #   option (not recommended) you can uncomment the following to ignore the entire idea folder.
 # .idea/
 # Abstra
 #   Abstra is an AI-powered process automation framework.
 #   Ignore directories containing user credentials, local state, and settings.
 #   Learn more at https://abstra.io/docs
 .abstra/
 # Visual Studio Code
 #   Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore 
 #   that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
 #   and can be added to the global gitignore or merged into this file. However, if you prefer, 
 #   you could uncomment the following to ignore the entire vscode folder
 # .vscode/
 # Ruff stuff:
 .ruff_cache/
 # PyPI configuration file
 .pypirc
 # Marimo
 marimo/_static/
 marimo/_lsp/
 __marimo__/
 # Streamlit
 .streamlit/secrets.toml
--- a/25
+++ b/25
@@ -0,0 +1,25 @@
 FROM python:3.12-slim AS builder
 COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /usr/local/bin/
 WORKDIR /app
 COPY pyproject.toml uv.lock ./
 RUN uv sync --no-dev --frozen --no-install-project
 COPY main.py .
 FROM python:3.12-slim
 WORKDIR /app
 COPY --from=builder /app /app
 ENV PATH="/app/.venv/bin:$PATH"
 # Cloud Run injects PORT (defaults to 8080)
 ENV PORT=8080
 EXPOSE ${PORT}
 # Shell form so ${PORT} is expanded at runtime
 CMD python main.py --transport sse --port ${PORT}
--- a/README.md
+++ b/README.md
@@ -0,0 +1,87 @@
 # knowledge-search-mcp
 An MCP (Model Context Protocol) server that exposes a `knowledge_search` tool for semantic search over a knowledge base backed by Vertex AI Vector Search and Google Cloud Storage.
 ## How it works
 1. A natural-language query is embedded using a Gemini embedding model.
 2. The embedding is sent to a Vertex AI Matching Engine index endpoint to find nearest neighbors.
 3. The matched document contents are fetched from a GCS bucket and returned to the caller.
 ## Prerequisites
 - Python ≥ 3.12
 - [uv](https://docs.astral.sh/uv/) for dependency management
 - A Google Cloud project with:
  - A Vertex AI Vector Search index and deployed endpoint
  - A GCS bucket containing the indexed document chunks
  - Application Default Credentials (or a service account) with appropriate permissions
 ## Configuration
 Create a `.env` file (see `Settings` in `main.py` for all options):
 ```env
 PROJECT_ID=my-gcp-project
 LOCATION=us-central1
 BUCKET=my-knowledge-bucket
 INDEX_NAME=my-index
 DEPLOYED_INDEX_ID=my-deployed-index
 ENDPOINT_NAME=projects/…/locations/…/indexEndpoints/…
 ENDPOINT_DOMAIN=123456789.us-central1-aiplatform.googleapis.com
 # optional
 EMBEDDING_MODEL=gemini-embedding-001
 SEARCH_LIMIT=10
 ```
 ## Usage
 ### Install dependencies
 ```bash
 uv sync
 ```
 ### Run the MCP server (stdio)
 ```bash
 uv run python main.py
 ```
 ### Run the MCP server (SSE, e.g. for remote clients)
 ```bash
 uv run python main.py --transport sse --port 8080
 ```
 ### Run the interactive agent (ADK)
 The bundled agent spawns the MCP server as a subprocess and provides a REPL:
 ```bash
 uv run python agent.py
 ```
 Or connect to an already-running SSE server:
 ```bash
 uv run python agent.py --remote http://localhost:8080/sse
 ```
 ## Docker
 ```bash
 docker build -t knowledge-search-mcp .
 docker run -p 8080:8080 --env-file .env knowledge-search-mcp
 ```
 The container starts the server in SSE mode on the port specified by `PORT` (default `8080`).
 ## Project structure
 ```
 main.py          MCP server, vector search client, and GCS storage helper
 agent.py         Interactive ADK agent that consumes the MCP server
 Dockerfile       Multi-stage build for Cloud Run / containerized deployment
 pyproject.toml   Project metadata and dependencies
 ```
--- a/agent.py
+++ b/agent.py
@@ -0,0 +1,111 @@
 # ruff: noqa: INP001
 """ADK agent that connects to the knowledge-search MCP server."""
 import argparse
 import asyncio
 import os
 from google.adk.agents.llm_agent import LlmAgent
 from google.adk.runners import Runner
 from google.adk.sessions import InMemorySessionService
 from google.adk.tools.mcp_tool import McpToolset
 from google.adk.tools.mcp_tool.mcp_session_manager import (
    SseConnectionParams,
    StdioConnectionParams,
 )
 from google.genai import types
 from mcp import StdioServerParameters
 # ADK needs these env vars for Vertex AI; reuse the ones from .env
 os.environ.setdefault("GOOGLE_GENAI_USE_VERTEXAI", "True")
 if project := os.environ.get("PROJECT_ID"):
    os.environ.setdefault("GOOGLE_CLOUD_PROJECT", project)
 if location := os.environ.get("LOCATION"):
    os.environ.setdefault("GOOGLE_CLOUD_LOCATION", location)
 SERVER_SCRIPT = os.path.join(os.path.dirname(os.path.abspath(__file__)), "main.py")
 def _parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description="Knowledge Search Agent")
    parser.add_argument(
        "--remote",
        metavar="URL",
        help="Connect to an already-running MCP server at this SSE URL "
        "(e.g. http://localhost:8080/sse). Without this flag the agent "
        "spawns the server as a subprocess.",
    )
    return parser.parse_args()
 async def async_main() -> None:
    args = _parse_args()
    if args.remote:
        connection_params = SseConnectionParams(url=args.remote)
    else:
        connection_params = StdioConnectionParams(
            server_params=StdioServerParameters(
                command="uv",
                args=["run", "python", SERVER_SCRIPT],
            ),
        )
    toolset = McpToolset(connection_params=connection_params)
    agent = LlmAgent(
        model="gemini-2.0-flash",
        name="knowledge_agent",
        instruction=(
            "You are a helpful assistant with access to a knowledge base. "
            "Use the knowledge_search tool to find relevant information "
            "when the user asks questions. Summarize the results clearly."
        ),
        tools=[toolset],
    )
    session_service = InMemorySessionService()
    session = await session_service.create_session(
        state={},
        app_name="knowledge_agent",
        user_id="user",
    )
    runner = Runner(
        app_name="knowledge_agent",
        agent=agent,
        session_service=session_service,
    )
    print("Knowledge Search Agent ready. Type your query (Ctrl+C to exit):")
    try:
        while True:
            try:
                query = input("\n> ").strip()
            except EOFError:
                break
            if not query:
                continue
            content = types.Content(
                role="user",
                parts=[types.Part(text=query)],
            )
            async for event in runner.run_async(
                session_id=session.id,
                user_id=session.user_id,
                new_message=content,
            ):
                if event.is_final_response() and event.content and event.content.parts:
                    for part in event.content.parts:
                        if part.text:
                            print(part.text)
    except KeyboardInterrupt:
        print("\nShutting down...")
    finally:
        await toolset.close()
 if __name__ == "__main__":
    asyncio.run(async_main())
--- a/main.py
+++ b/main.py
@@ -1,6 +1,7 @@
 # ruff: noqa: INP001
 """Async helpers for querying Vertex AI vector search via MCP."""
 import argparse
 import asyncio
 import io
 import logging
@@ -244,7 +245,10 @@ class GoogleCloudVectorSearch:
            json=payload,
            headers=headers,
        ) as response:
-            response.raise_for_status()
+            if not response.ok:
                body = await response.text()
                msg = f"findNeighbors returned {response.status}: {body}"
                raise RuntimeError(msg)
            data = await response.json()
        neighbors = data.get("nearestNeighbors", [{}])[0].get("neighbors", [])
@@ -281,6 +285,8 @@ class GoogleCloudVectorSearch:
 class Settings(BaseSettings):
    """Server configuration populated from environment variables."""
    model_config = {"env_file": ".env"}
    project_id: str
    location: str
    bucket: str
@@ -288,7 +294,7 @@ class Settings(BaseSettings):
    deployed_index_id: str
    endpoint_name: str
    endpoint_domain: str
-    embedding_model: str = "text-embedding-005"
+    embedding_model: str = "gemini-embedding-001"
    search_limit: int = 10
@@ -330,7 +336,26 @@ async def lifespan(_server: FastMCP) -> AsyncIterator[AppContext]:
    )
-mcp = FastMCP("knowledge-search", lifespan=lifespan)
+def _parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--transport",
        choices=["stdio", "sse"],
        default="stdio",
    )
    parser.add_argument("--host", default="0.0.0.0")
    parser.add_argument("--port", type=int, default=8080)
    return parser.parse_args()
 _args = _parse_args()
 mcp = FastMCP(
    "knowledge-search",
    host=_args.host,
    port=_args.port,
    lifespan=lifespan,
 )
@mcp.tool()
@@ -359,6 +384,7 @@ async def knowledge_search(
        contents=query,
        config=genai_types.EmbedContentConfig(
            task_type="RETRIEVAL_QUERY",
        ),
    )
    embedding = response.embeddings[0].values
@@ -398,4 +424,4 @@ async def knowledge_search(
 if __name__ == "__main__":
-    mcp.run()
+    mcp.run(transport=_args.transport)
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,6 +16,7 @@ dependencies = [
 [dependency-groups]
 dev = [
    "google-adk>=1.25.1",
    "ruff>=0.15.2",
    "ty>=0.0.18",
 ]
--- a/uv.lock
+++ b/uv.lock
Author	SHA1	Message	Date
Anibal Angulo	bd107a027a	Update README	2026-02-22 16:02:46 +00:00
Anibal Angulo	dcc05d697e	Add Docker	2026-02-22 15:57:57 +00:00
Anibal Angulo	82764bd60b	Add SSE support	2026-02-22 15:52:35 +00:00
Anibal Angulo	54eb6f240c	add agent for testing directly	2026-02-22 15:41:16 +00:00
Anibal Angulo	bb19770663	Update MCP defaults	2026-02-22 15:40:59 +00:00