turbopuffer supports vector search with filtering. Vectors are incrementally indexed in a SPANN vector index for performant search. Writes appear in search results immediately.
The vector index is automatically tuned for 90-100% recall ("accuracy"). We automatically monitor recall for production queries. You can use the recall endpoint to test yourself.
# $ pip install turbopuffer[fast]
import turbopuffer as tpuf
import os
import uuid
# API tokens are created in the dashboard https://turbopuffer.com/dashboard
tpuf.api_key = os.getenv("TURBOPUFFER_API_KEY")
# Pick the right region https://turbopuffer.com/docs/regions
tpuf.api_base_url = "https://gcp-us-central1.turbopuffer.com"
# Create an embedding with OpenAI, could be {Cohere, Voyage, Mixed Bread, ...}
# Requires OPENAI_API_KEY to be set (https://platform.openai.com/settings/organization/api-keys)
def openai_or_rand_vector(text: str) -> list[float]:
if not os.getenv("OPENAI_API_KEY"): print("OPENAI_API_KEY not set, using random vectors"); return [__import__('random').random()]*2
try: return __import__('openai').embeddings.create(model="text-embedding-3-small",input=text).data[0].embedding
except ImportError: return [__import__('random').random()]*2
ns = tpuf.Namespace(f'vector-py-{uuid.uuid4()}')
# Basic vector search example
ns.upsert(
ids=[1, 2, 3],
vectors=[
openai_or_rand_vector("A cat sleeping on a windowsill"),
openai_or_rand_vector("A playful kitten chasing a toy"),
openai_or_rand_vector("An airplane flying through clouds")
],
attributes={
'text': ['A cat sleeping on a windowsill', 'A playful kitten chasing a toy', 'An airplane flying through clouds' ],
'category': ['animal', 'animal', 'vehicle']
},
distance_metric='cosine_distance'
)
results = ns.query(
vector=openai_or_rand_vector("feline"),
top_k=2,
distance_metric='cosine_distance',
include_attributes=['text']
)
# Returns cat and kitten documents, sorted by vector similarity
print(results)
# Example of vector search with filters
ns = tpuf.Namespace('vector-py-2')
ns.upsert(
ids=[1, 2, 3, 4],
vectors=[
openai_or_rand_vector("A shiny red sports car"),
openai_or_rand_vector("A sleek blue sedan"),
openai_or_rand_vector("A large red delivery truck"),
openai_or_rand_vector("A blue pickup truck")
],
attributes={
'description': [
'A shiny red sports car',
'A sleek blue sedan',
'A large red delivery truck',
'A blue pickup truck'
],
'color': ['red', 'blue', 'blue', 'blue'],
'type': ['car', 'car', 'truck', 'truck'],
'price': [50000, 35000, 80000, 45000]
},
distance_metric='cosine_distance'
)
results = ns.query(
vector=openai_or_rand_vector("car"), # Embedding similar to "car"
top_k=10,
distance_metric='cosine_distance',
# Complex filter combining multiple conditions, see https://turbopuffer.com/docs/query for all options
filters=['And', [
['price', 'Lt', 60000],
['color', 'Eq', 'blue']
]],
include_attributes=['description', 'price']
)
print(results) # car, then truck