Logo

Vector Search Guide

turbopuffer supports vector search with filtering. Vectors are incrementally indexed in a SPANN vector index for performant search. Writes appear in search results immediately.

The vector index is automatically tuned for 90-100% recall ("accuracy"). We automatically monitor recall for production queries. You can use the recall endpoint to test yourself.

# $ pip install turbopuffer[fast]
import turbopuffer as tpuf
import os
import uuid
# API tokens are created in the dashboard https://turbopuffer.com/dashboard
tpuf.api_key = os.getenv("TURBOPUFFER_API_KEY")
# Pick the right region https://turbopuffer.com/docs/regions
tpuf.api_base_url = "https://gcp-us-central1.turbopuffer.com"

# Create an embedding with OpenAI, could be {Cohere, Voyage, Mixed Bread, ...}
# Requires OPENAI_API_KEY to be set (https://platform.openai.com/settings/organization/api-keys)
def openai_or_rand_vector(text: str) -> list[float]:
    if not os.getenv("OPENAI_API_KEY"): print("OPENAI_API_KEY not set, using random vectors"); return [__import__('random').random()]*2
    try: return __import__('openai').embeddings.create(model="text-embedding-3-small",input=text).data[0].embedding 
    except ImportError: return [__import__('random').random()]*2

ns = tpuf.Namespace(f'vector-py-{uuid.uuid4()}')

# Basic vector search example
ns.upsert(
    ids=[1, 2, 3],
    vectors=[
        openai_or_rand_vector("A cat sleeping on a windowsill"),
        openai_or_rand_vector("A playful kitten chasing a toy"),
        openai_or_rand_vector("An airplane flying through clouds")
    ],
    attributes={
        'text': ['A cat sleeping on a windowsill', 'A playful kitten chasing a toy', 'An airplane flying through clouds' ],
        'category': ['animal', 'animal', 'vehicle']
    },
    distance_metric='cosine_distance'
)

results = ns.query(
    vector=openai_or_rand_vector("feline"),
    top_k=2,
    distance_metric='cosine_distance',
    include_attributes=['text']
)
# Returns cat and kitten documents, sorted by vector similarity
print(results)

# Example of vector search with filters
ns = tpuf.Namespace('vector-py-2')
ns.upsert(
    ids=[1, 2, 3, 4],
    vectors=[
        openai_or_rand_vector("A shiny red sports car"),
        openai_or_rand_vector("A sleek blue sedan"),
        openai_or_rand_vector("A large red delivery truck"),
        openai_or_rand_vector("A blue pickup truck")
    ],
    attributes={
        'description': [
            'A shiny red sports car',
            'A sleek blue sedan',
            'A large red delivery truck',
            'A blue pickup truck'
        ],
        'color': ['red', 'blue', 'blue', 'blue'],
        'type': ['car', 'car', 'truck', 'truck'],
        'price': [50000, 35000, 80000, 45000]
    },
    distance_metric='cosine_distance'
)

results = ns.query(
    vector=openai_or_rand_vector("car"),  # Embedding similar to "car"
    top_k=10,
    distance_metric='cosine_distance',
    # Complex filter combining multiple conditions, see https://turbopuffer.com/docs/query for all options
    filters=['And', [
        ['price', 'Lt', 60000],
        ['color', 'Eq', 'blue']
    ]],
    include_attributes=['description', 'price']
)
print(results) # car, then truck
© 2025 turbopuffer Inc.
Privacy PolicyTerms of service