LangBot/src/langbot/pkg/vector/vdb.py

from __future__ import annotations
import abc
import enum
from typing import Any, Dict
import numpy as np


class SearchType(str, enum.Enum):
    """Supported search types for vector databases."""

    VECTOR = 'vector'
    FULL_TEXT = 'full_text'
    HYBRID = 'hybrid'


class VectorDatabase(abc.ABC):
    @classmethod
    def supported_search_types(cls) -> list[SearchType]:
        """Return the search types supported by this VDB backend.

        Default: vector search only. Override in subclasses that support
        full-text or hybrid search.
        """
        return [SearchType.VECTOR]

    @abc.abstractmethod
    async def add_embeddings(
        self,
        collection: str,
        ids: list[str],
        embeddings_list: list[list[float]],
        metadatas: list[dict[str, Any]],
        documents: list[str] | None = None,
    ) -> None:
        """Add vector data to the specified collection.

        Args:
            collection: Collection name.
            ids: Unique IDs for each vector.
            embeddings_list: List of embedding vectors.
            metadatas: List of metadata dicts.
            documents: Optional raw text documents. Required for full-text
                and hybrid search in backends that support them.
        """
        pass

    @abc.abstractmethod
    async def search(
        self,
        collection: str,
        query_embedding: np.ndarray,
        k: int = 5,
        search_type: str = 'vector',
        query_text: str = '',
        filter: dict[str, Any] | None = None,
        vector_weight: float | None = None,
    ) -> Dict[str, Any]:
        """Search for the most similar vectors in the specified collection.

        Args:
            collection: Collection name.
            query_embedding: Query vector for similarity search.
            k: Number of results to return.
            search_type: One of 'vector', 'full_text', 'hybrid'.
            query_text: Raw query text, used for full_text and hybrid search.
            filter: Optional metadata filters using Chroma-style ``where``
                syntax.  Multiple top-level keys are AND-ed.  Supported
                operators: ``$eq``, ``$ne``, ``$gt``, ``$gte``, ``$lt``,
                ``$lte``, ``$in``, ``$nin``.  Example::

                    {"file_id": "abc"}
                    {"created_at": {"$gte": 1700000000}}
                    {"file_type": {"$in": ["pdf", "docx"]}}
            vector_weight: Weight for vector search in hybrid mode (0.0–1.0).
                ``None`` means use equal weights (backward compatible).
        """
        pass

    @abc.abstractmethod
    async def delete_by_file_id(self, collection: str, file_id: str) -> None:
        """Delete vectors from the specified collection by file_id."""
        pass

    @abc.abstractmethod
    async def delete_by_filter(self, collection: str, filter: dict[str, Any]) -> int:
        """Delete vectors matching the given metadata filter.

        Args:
            collection: Collection name.
            filter: Metadata filter dict in canonical format (see ``search``).

        Returns:
            Number of deleted vectors (best-effort; backends that cannot
            report an exact count may return 0).
        """
        pass

    async def list_by_filter(
        self,
        collection: str,
        filter: dict[str, Any] | None = None,
        limit: int = 20,
        offset: int = 0,
    ) -> tuple[list[dict[str, Any]], int]:
        """List vectors matching the given metadata filter with pagination.

        Args:
            collection: Collection name.
            filter: Optional metadata filter dict in canonical format.
            limit: Maximum number of items to return.
            offset: Number of items to skip.

        Returns:
            Tuple of (items, total) where items is a list of dicts with
            keys 'id', 'document', 'metadata', and total is the best-effort
            count of all matching vectors (-1 if unknown).
        """
        return [], -1

    @abc.abstractmethod
    async def get_or_create_collection(self, collection: str):
        """Get or create collection."""
        pass

    @abc.abstractmethod
    async def delete_collection(self, collection: str):
        """Delete collection."""
        pass