Hybrid Search

┌─{search.py,search.ts}────────┐
│ ┌──────────┐                 │
│ │   User   │                 │
│ │  Query   │                 │
│ └─────┬────┘                 │
│       ▼                      │
│ ┌─turbopuffer queries──┐     │
│ │ ┌────────────────┐   │     │
│ ├▶│ Vector Query 1 │───┤     │
│ │ └────────────────┘   │     │
│ │ ┌────────────────┐   │     │
│ ├▶│ Vector Query 2 │───┤     │
│ │ └────────────────┘   │     │
│ │ ┌────────────────┐   │     │
│ ├▶│  Text Query 1  │───┤     │
│ │ └────────────────┘   │     │
│ └──────────┬───────────┘     │
│            ▼                 │
│    ┌────────────┐            │
│    │ Rank Fuse  │            │
│    └─────┬──────┘            │
│          ▼                   │
│    ┌────────────┐            │
│    │  Re-Rank   │            │
│    └────────────┘            │
└──────────────────────────────┘

To improve search quality, multiple strategies can be used together. This is commonly referred to as hybrid search.

turbopuffer supports vector search and BM25 full-text search. Combining them produces semantically relevant search results (vectors), as well as results matching specific words or strings (i.e. product SKUs, email addresses, weighing exact keywords highly).

Keep search logic in {search.py, search.ts}. Use turbopuffer for initial retrieval to narrow millions of results to dozens for rank fusion and re-ranking.

To improve search results further, we suggest:

Choose an for the vector side of hybrid search. Pick from the dropdown in the code sample below, or use random vectors to start (don't use in production or for benchmarking).

# $ pip install turbopuffer sentence-transformers
import os
import uuid
from typing import List

import turbopuffer
from sentence_transformers import SentenceTransformer
from turbopuffer.types import ID, Row

tpuf = turbopuffer.Turbopuffer(
    api_key=os.getenv("TURBOPUFFER_API_KEY"), # created here: https://turbopuffer.com/dashboard
    region="gcp-us-central1", # choose best region: https://turbopuffer.com/docs/regions
)

namespace = os.getenv("TURBOPUFFER_NAMESPACE", f"hybrid-example-{uuid.uuid4().hex[:8]}")
ns = tpuf.namespace(namespace)

# Local embeddings with BGE -- no API key needed.
# Model is downloaded on first run (~130 MB).
bge = SentenceTransformer("BAAI/bge-small-en-v1.5")

def embed(text: str) -> List[float]:
    return bge.encode(text).tolist()

# Upsert documents with both FTS and vector search capabilities
ns.write(
    upsert_rows=[
    {
        "id": 1,
        "vector": embed("Muesli: A mix of raw oats, nuts and dried fruit served with cold milk"),
        "content": "Muesli: A quick mix of raw oats, nuts and dried fruit served with cold milk",
    },
    {
        "id": 2,
        "vector": embed("Classic chia seed pudding is a cold breakfast that takes 5 minutes to prepare"),
        "content": "Classic chia seed pudding is a cold breakfast that takes 5 minutes to prepare",
    },
    {
        "id": 3,
        "vector": embed("Overnight oats: Mix oats with milk, refrigerate overnight for a delicious chilled breakfast"),
        "content": "Overnight oats: Mix oats with milk, refrigerate overnight for a delicious chilled breakfast",
    },
    {
        "id": 4,
        "vector": embed("Hot oatmeal is a quick and healthy breakfast"),
        "content": "Hot oatmeal is a quick and healthy breakfast",
    },
    {
        "id": 5,
        "vector": embed("Breakfast sandwich: A little extra prep, but worth it on Sunday mornings!"),
        "content": "Breakfast sandwich: A little extra prep, but worth it on Sunday mornings!",
    }
    ],
    distance_metric="cosine_distance",
    schema={"content": {"type": "string", "full_text_search": True}},
)

query = "quick breakfast like oatmeal but cold"
print("Ideal:", [1, 2, 3, 4, 5])

response = ns.multi_query(
    queries=[
        {
            "rank_by": ("vector", "ANN", embed(query)),
            "limit": 10,
            "include_attributes": ["content"],
        },
        {
            "rank_by": ("content", "BM25", query),
            "limit": 10,
            "include_attributes": ["content"],
        },
    ]
)

vector_result, fts_result = response.results[0].rows, response.results[1].rows
print("Vector:", [item.id for item in vector_result])
print("FTS:", [item.id for item in fts_result])

def reciprocal_rank_fusion(result_lists, k = 60): 
    scores = {} 
    all_results = {} 
    for results in result_lists:
        for rank, item in enumerate(results, start=1):
            scores[item.id] = scores.get(item.id, 0) + 1.0 / (k + rank)
            all_results[item.id] = item
    return [
        setattr(all_results[doc_id], '$dist', score) or all_results[doc_id]
        for doc_id, score in sorted(scores.items(), key=lambda x: x[1], reverse=True)
    ]

fused_results = reciprocal_rank_fusion([vector_result, fts_result])
print("Fused:", [item.id for item in fused_results])

def cohere_rerank_or_unranked(results, query, k = None): 
    if not os.getenv("COHERE_API_KEY"):
        print("Warning: COHERE_API_KEY not set (https://dashboard.cohere.com/api-keys), returning unranked results")
        return results
    try:
        co = __import__('cohere').Client(os.getenv("COHERE_API_KEY"))
        reranked = co.rerank(query=query, documents=[r.content for r in results], top_n=k or len(results)).results
        for r in reranked:
            results[r.index]['$dist'] = r.relevance_score
        return [results[r.index] for r in reranked]
    except (ImportError, AttributeError):
        print("Warning: cohere package not installed (`pip install cohere`), returning unranked results")
        return results

reranked_results = cohere_rerank_or_unranked(fused_results, query)
print("Reranked:", [item.id for item in reranked_results])