First commit

This commit is contained in:
Anibal Angulo
2026-02-18 19:57:43 +00:00
commit a53f8fcf62
115 changed files with 9957 additions and 0 deletions

View File

@@ -0,0 +1 @@
3.10

View File

View File

@@ -0,0 +1,29 @@
[project]
name = "vector-search"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
authors = [
{ name = "Anibal Angulo", email = "a8065384@banorte.com" }
]
requires-python = ">=3.12"
dependencies = [
"embedder",
"file-storage",
"google-cloud-aiplatform>=1.106.0",
"aiohttp>=3.10.11,<4",
"gcloud-aio-auth>=5.3.0",
"google-auth==2.29.0",
"typer>=0.16.1",
]
[project.scripts]
vector-search = "vector_search.cli:app"
[build-system]
requires = ["uv_build>=0.8.3,<0.9.0"]
build-backend = "uv_build"
[tool.uv.sources]
file-storage = { workspace = true }
embedder = { workspace = true }

View File

@@ -0,0 +1,2 @@
def hello() -> str:
return "Hello from vector-search!"

View File

@@ -0,0 +1,62 @@
from abc import ABC, abstractmethod
from typing import List, TypedDict
class SearchResult(TypedDict):
id: str
distance: float
content: str
class BaseVectorSearch(ABC):
"""
Abstract base class for a vector search provider.
This class defines the standard interface for creating a vector search index
and running queries against it.
"""
@abstractmethod
def create_index(self, name: str, content_path: str, **kwargs) -> None:
"""
Creates a new vector search index and populates it with the provided content.
Args:
name: The desired name for the new index.
content_path: The local file system path to the data that will be used to
populate the index. This is expected to be a JSON file
containing a list of objects, each with an 'id', 'name',
and 'embedding' key.
**kwargs: Additional provider-specific arguments for index creation.
"""
...
@abstractmethod
def update_index(self, index_name: str, content_path: str, **kwargs) -> None:
"""
Updates an existing vector search index with new content.
Args:
index_name: The name of the index to update.
content_path: The local file system path to the data that will be used to
populate the index.
**kwargs: Additional provider-specific arguments for index update.
"""
...
@abstractmethod
def run_query(
self, index: str, query: List[float], limit: int
) -> List[SearchResult]:
"""
Runs a similarity search query against the index.
Args:
query: The embedding vector to use for the search query.
limit: The maximum number of nearest neighbors to return.
Returns:
A list of dictionaries, where each dictionary represents a matched item
and contains at least the item's 'id' and the search 'distance'.
"""
...

View File

@@ -0,0 +1,10 @@
from typer import Typer
from .create import app as create_callback
from .delete import app as delete_callback
from .query import app as query_callback
app = Typer()
app.add_typer(create_callback, name="create")
app.add_typer(delete_callback, name="delete")
app.add_typer(query_callback, name="query")

View File

@@ -0,0 +1,91 @@
"""Create and deploy a Vertex AI Vector Search index."""
from typing import Annotated
import typer
from rich.console import Console
from rag_eval.config import settings as config
from vector_search.vertex_ai import GoogleCloudVectorSearch
app = typer.Typer()
@app.callback(invoke_without_command=True)
def create(
path: Annotated[
str,
typer.Option(
"--path",
"-p",
help="The GCS URI (gs://...) to the directory containing your embedding JSON file(s).",
),
],
agent_name: Annotated[
str,
typer.Option(
"--agent",
"-a",
help="The name of the agent to create the index for.",
),
],
):
"""Create and deploy a Vertex AI Vector Search index for a specific agent."""
console = Console()
try:
console.print(
f"[bold green]Looking up configuration for agent '{agent_name}'...[/bold green]"
)
agent_config = config.agents.get(agent_name)
if not agent_config:
console.print(
f"[bold red]Agent '{agent_name}' not found in settings.[/bold red]"
)
raise typer.Exit(code=1)
if not agent_config.index:
console.print(
f"[bold red]Index configuration not found for agent '{agent_name}'.[/bold red]"
)
raise typer.Exit(code=1)
index_config = agent_config.index
console.print(
f"[bold green]Initializing Vertex AI client for project '{config.project_id}' in '{config.location}'...[/bold green]"
)
vector_search = GoogleCloudVectorSearch(
project_id=config.project_id,
location=config.location,
bucket=config.bucket,
index_name=index_config.name,
)
console.print(
f"[bold green]Starting creation of index '{index_config.name}'...[/bold green]"
)
console.print("This may take a while.")
vector_search.create_index(
name=index_config.name,
content_path=f"gs://{config.bucket}/{path}",
dimensions=index_config.dimensions,
)
console.print(
f"[bold green]Index '{index_config.name}' created successfully.[/bold green]"
)
console.print("[bold green]Deploying index to a new endpoint...[/bold green]")
console.print("This will also take some time.")
vector_search.deploy_index(
index_name=index_config.name, machine_type=index_config.machine_type
)
console.print("[bold green]Index deployed successfully![/bold green]")
console.print(f"Endpoint name: {vector_search.index_endpoint.display_name}")
console.print(
f"Endpoint resource name: {vector_search.index_endpoint.resource_name}"
)
except Exception as e:
console.print(f"[bold red]An error occurred: {e}[/bold red]")
raise typer.Exit(code=1)

View File

@@ -0,0 +1,38 @@
"""Delete a vector index or endpoint."""
import typer
from rich.console import Console
from rag_eval.config import settings as config
from vector_search.vertex_ai import GoogleCloudVectorSearch
app = typer.Typer()
@app.callback(invoke_without_command=True)
def delete(
id: str = typer.Argument(..., help="The ID of the index or endpoint to delete."),
endpoint: bool = typer.Option(
False, "--endpoint", help="Delete an endpoint instead of an index."
),
):
"""Delete a vector index or endpoint."""
console = Console()
vector_search = GoogleCloudVectorSearch(
project_id=config.project_id, location=config.location, bucket=config.bucket
)
try:
if endpoint:
console.print(f"[bold red]Deleting endpoint {id}...[/bold red]")
vector_search.delete_index_endpoint(id)
console.print(
f"[bold green]Endpoint {id} deleted successfully.[/bold green]"
)
else:
console.print(f"[bold red]Deleting index {id}...[/bold red]")
vector_search.delete_index(id)
console.print(f"[bold green]Index {id} deleted successfully.[/bold green]")
except Exception as e:
console.print(f"[bold red]An error occurred: {e}[/bold red]")
raise typer.Exit(code=1)

View File

@@ -0,0 +1,91 @@
"""Generate embeddings for documents and save them to a JSON file."""
import json
from pathlib import Path
import typer
from embedder.vertex_ai import VertexAIEmbedder
from file_storage.google_cloud import GoogleCloudFileStorage
from rich.console import Console
from rich.progress import Progress
from rag_eval.config import Settings
app = typer.Typer()
@app.callback(invoke_without_command=True)
def generate(
path: str = typer.Argument(..., help="The path to the markdown files."),
output_file: str = typer.Option(
...,
"--output-file",
"-o",
help="The local path to save the output JSON file.",
),
batch_size: int = typer.Option(
10,
"--batch-size",
"-b",
help="The batch size for processing files.",
),
jsonl: bool = typer.Option(
False,
"--jsonl",
help="Output in JSONL format instead of JSON.",
),
):
"""Generate embeddings for documents and save them to a JSON file."""
config = Settings()
console = Console()
console.print("[bold green]Starting vector generation...[/bold green]")
try:
storage = GoogleCloudFileStorage(bucket=config.bucket)
embedder = VertexAIEmbedder(model_name=config.embedding_model)
remote_files = storage.list_files(path=path)
results = []
with Progress(console=console) as progress:
task = progress.add_task(
"[cyan]Generating embeddings...", total=len(remote_files)
)
for i in range(0, len(remote_files), batch_size):
batch_files = remote_files[i : i + batch_size]
batch_contents = []
for remote_file in batch_files:
file_stream = storage.get_file_stream(remote_file)
batch_contents.append(
file_stream.read().decode("utf-8-sig", errors="replace")
)
batch_embeddings = embedder.generate_embeddings_batch(batch_contents)
for j, remote_file in enumerate(batch_files):
results.append(
{"id": remote_file, "embedding": batch_embeddings[j]}
)
progress.update(task, advance=1)
except Exception as e:
console.print(
f"[bold red]An error occurred during vector generation: {e}[/bold red]"
)
raise typer.Exit(code=1)
output_path = Path(output_file)
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, "w") as f:
if jsonl:
for record in results:
f.write(json.dumps(record) + "\n")
else:
json.dump(results, f, indent=2)
console.print(
f"[bold green]Embedding generation complete. {len(results)} vectors saved to '{output_path.resolve()}'[/bold green]"
)

View File

@@ -0,0 +1,55 @@
"""Query the vector search index."""
import typer
from embedder.vertex_ai import VertexAIEmbedder
from rich.console import Console
from rich.table import Table
from typer import Argument, Option
from rag_eval.config import settings as config
from vector_search.vertex_ai import GoogleCloudVectorSearch
app = typer.Typer()
@app.callback(invoke_without_command=True)
def query(
query: str = Argument(..., help="The text query to search for."),
limit: int = Option(5, "--limit", "-l", help="The number of results to return."),
):
"""Queries the vector search index."""
console = Console()
try:
console.print("[bold green]Initializing clients...[/bold green]")
embedder = VertexAIEmbedder(model_name=config.embedding_model)
vector_search = GoogleCloudVectorSearch(
project_id=config.project_id, location=config.location, bucket=config.bucket
)
console.print("[bold green]Loading index endpoint...[/bold green]")
vector_search.load_index_endpoint(config.index.endpoint)
console.print("[bold green]Generating embedding for query...[/bold green]")
query_embedding = embedder.generate_embedding(query)
console.print("[bold green]Running search query...[/bold green]")
search_results = vector_search.run_query(
deployed_index_id=config.index.deployment,
query=query_embedding,
limit=limit,
)
table = Table(title="Search Results")
table.add_column("ID", justify="left", style="cyan")
table.add_column("Distance", justify="left", style="magenta")
table.add_column("Content", justify="left", style="green")
for result in search_results:
table.add_row(result["id"], str(result["distance"]), result["content"])
console.print(table)
except Exception as e:
console.print(f"[bold red]An error occurred: {e}[/bold red]")
raise typer.Exit(code=1)

View File

@@ -0,0 +1,255 @@
import asyncio
from typing import List
from uuid import uuid4
import aiohttp
import google.auth
import google.auth.transport.requests
from file_storage.google_cloud import GoogleCloudFileStorage
from gcloud.aio.auth import Token
from google.cloud import aiplatform
from .base import BaseVectorSearch, SearchResult
class GoogleCloudVectorSearch(BaseVectorSearch):
"""
A vector search provider that uses Google Cloud's Vertex AI Vector Search.
"""
def __init__(
self, project_id: str, location: str, bucket: str, index_name: str = None
):
"""
Initializes the GoogleCloudVectorSearch client.
Args:
project_id: The Google Cloud project ID.
location: The Google Cloud location (e.g., 'us-central1').
bucket: The GCS bucket to use for file storage.
index_name: The name of the index. If None, it will be taken from settings.
"""
aiplatform.init(project=project_id, location=location)
self.project_id = project_id
self.location = location
self.storage = GoogleCloudFileStorage(bucket=bucket)
self.index_name = index_name
self._credentials = None
self._aio_session: aiohttp.ClientSession | None = None
self._async_token: Token | None = None
def _get_auth_headers(self) -> dict:
if self._credentials is None:
self._credentials, _ = google.auth.default(
scopes=["https://www.googleapis.com/auth/cloud-platform"]
)
if not self._credentials.token or self._credentials.expired:
self._credentials.refresh(google.auth.transport.requests.Request())
return {
"Authorization": f"Bearer {self._credentials.token}",
"Content-Type": "application/json",
}
async def _async_get_auth_headers(self) -> dict:
if self._async_token is None:
self._async_token = Token(
session=self._get_aio_session(),
scopes=["https://www.googleapis.com/auth/cloud-platform"],
)
access_token = await self._async_token.get()
return {
"Authorization": f"Bearer {access_token}",
"Content-Type": "application/json",
}
def _get_aio_session(self) -> aiohttp.ClientSession:
if self._aio_session is None or self._aio_session.closed:
connector = aiohttp.TCPConnector(limit=300, limit_per_host=50)
timeout = aiohttp.ClientTimeout(total=60)
self._aio_session = aiohttp.ClientSession(
timeout=timeout, connector=connector
)
return self._aio_session
def create_index(
self,
name: str,
content_path: str,
dimensions: int,
approximate_neighbors_count: int = 150,
distance_measure_type: str = "DOT_PRODUCT_DISTANCE",
**kwargs,
) -> None:
"""
Creates a new Vertex AI Vector Search index.
Args:
name: The display name for the new index.
content_path: The GCS URI to the JSON file containing the embeddings.
dimensions: The number of dimensions in the embedding vectors.
approximate_neighbors_count: The number of neighbors to find for each vector.
distance_measure_type: The distance measure to use (e.g., 'DOT_PRODUCT_DISTANCE').
"""
index = aiplatform.MatchingEngineIndex.create_tree_ah_index(
display_name=name,
contents_delta_uri=content_path,
dimensions=dimensions,
approximate_neighbors_count=approximate_neighbors_count,
distance_measure_type=distance_measure_type,
leaf_node_embedding_count=1000,
leaf_nodes_to_search_percent=10,
)
self.index = index
def update_index(self, index_name: str, content_path: str, **kwargs) -> None:
"""
Updates an existing Vertex AI Vector Search index.
Args:
index_name: The resource name of the index to update.
content_path: The GCS URI to the JSON file containing the new embeddings.
"""
index = aiplatform.MatchingEngineIndex(index_name=index_name)
index.update_embeddings(
contents_delta_uri=content_path,
)
self.index = index
def deploy_index(
self, index_name: str, machine_type: str = "e2-standard-2"
) -> None:
"""
Deploys a Vertex AI Vector Search index to an endpoint.
Args:
index_name: The name of the index to deploy.
machine_type: The type of machine to use for the endpoint.
"""
index_endpoint = aiplatform.MatchingEngineIndexEndpoint.create(
display_name=f"{index_name}-endpoint",
public_endpoint_enabled=True,
)
index_endpoint.deploy_index(
index=self.index,
deployed_index_id=f"{index_name.replace('-', '_')}_deployed_{uuid4().hex}",
machine_type=machine_type,
)
self.index_endpoint = index_endpoint
def load_index_endpoint(self, endpoint_name: str) -> None:
"""
Loads an existing Vertex AI Vector Search index endpoint.
Args:
endpoint_name: The resource name of the index endpoint.
"""
self.index_endpoint = aiplatform.MatchingEngineIndexEndpoint(endpoint_name)
if not self.index_endpoint.public_endpoint_domain_name:
raise ValueError(
"The index endpoint does not have a public endpoint. "
"Please ensure that the endpoint is configured for public access."
)
def run_query(
self, deployed_index_id: str, query: List[float], limit: int
) -> List[SearchResult]:
"""
Runs a similarity search query against the deployed index.
Args:
deployed_index_id: The ID of the deployed index.
query: The embedding vector to use for the search query.
limit: The maximum number of nearest neighbors to return.
Returns:
A list of dictionaries representing the matched items.
"""
response = self.index_endpoint.find_neighbors(
deployed_index_id=deployed_index_id, queries=[query], num_neighbors=limit
)
results = []
for neighbor in response[0]:
file_path = self.index_name + "/contents/" + neighbor.id + ".md"
content = self.storage.get_file_stream(file_path).read().decode("utf-8")
results.append(
{"id": neighbor.id, "distance": neighbor.distance, "content": content}
)
return results
async def async_run_query(
self, deployed_index_id: str, query: List[float], limit: int
) -> List[SearchResult]:
"""
Runs a non-blocking similarity search query against the deployed index
using the REST API directly with an async HTTP client.
Args:
deployed_index_id: The ID of the deployed index.
query: The embedding vector to use for the search query.
limit: The maximum number of nearest neighbors to return.
Returns:
A list of dictionaries representing the matched items.
"""
domain = self.index_endpoint.public_endpoint_domain_name
endpoint_id = self.index_endpoint.name.split("/")[-1]
url = (
f"https://{domain}/v1/projects/{self.project_id}"
f"/locations/{self.location}"
f"/indexEndpoints/{endpoint_id}:findNeighbors"
)
payload = {
"deployed_index_id": deployed_index_id,
"queries": [
{
"datapoint": {"feature_vector": query},
"neighbor_count": limit,
}
],
}
headers = await self._async_get_auth_headers()
session = self._get_aio_session()
async with session.post(url, json=payload, headers=headers) as response:
response.raise_for_status()
data = await response.json()
neighbors = data.get("nearestNeighbors", [{}])[0].get("neighbors", [])
content_tasks = []
for neighbor in neighbors:
datapoint_id = neighbor["datapoint"]["datapointId"]
file_path = f"{self.index_name}/contents/{datapoint_id}.md"
content_tasks.append(self.storage.async_get_file_stream(file_path))
file_streams = await asyncio.gather(*content_tasks)
results: List[SearchResult] = []
for neighbor, stream in zip(neighbors, file_streams):
results.append(
{
"id": neighbor["datapoint"]["datapointId"],
"distance": neighbor["distance"],
"content": stream.read().decode("utf-8"),
}
)
return results
def delete_index(self, index_name: str) -> None:
"""
Deletes a Vertex AI Vector Search index.
Args:
index_name: The resource name of the index.
"""
index = aiplatform.MatchingEngineIndex(index_name)
index.delete()
def delete_index_endpoint(self, index_endpoint_name: str) -> None:
"""
Deletes a Vertex AI Vector Search index endpoint.
Args:
index_endpoint_name: The resource name of the index endpoint.
"""
index_endpoint = aiplatform.MatchingEngineIndexEndpoint(index_endpoint_name)
index_endpoint.undeploy_all()
index_endpoint.delete(force=True)