Get a quick feel for the API with some examples.
# First, install the python package
# $ pip install turbopuffer[fast]
import turbopuffer as tpuf
import os
import uuid
# API tokens are created in the dashboard https://turbopuffer.com/dashboard
tpuf.api_key = os.getenv("TURBOPUFFER_API_KEY")
# Pick the right region https://turbopuffer.com/docs/regions
tpuf.api_base_url = "https://gcp-us-central1.turbopuffer.com"
ns = tpuf.Namespace(f'namespace-a-py-{uuid.uuid4()}')
# Create an embedding with OpenAI, could be {Cohere, Voyage, Mixed Bread, ...}
# Requires OPENAI_API_KEY to be set (https://platform.openai.com/settings/organization/api-keys)
def openai_or_rand_vector(text: str) -> list[float]:
if not os.getenv("OPENAI_API_KEY"): print("OPENAI_API_KEY not set, using random vectors"); return [__import__('random').random()]*2
try: return __import__('openai').embeddings.create(model="text-embedding-3-small",input=text).data[0].embedding
except ImportError: return [__import__('random').random()]*2
# Upsert documents with vectors and attributes
ns.upsert(
ids=[1, 2],
vectors=[openai_or_rand_vector("walrus narwhal"), openai_or_rand_vector("elephant walrus rhino")],
attributes={"name": ["foo", "foo"], "public": [1, 0], "text": ["walrus narwhal", "elephant walrus rhino"]},
distance_metric='cosine_distance',
schema={
"text": { # Configure FTS/BM25, other attribtues have inferred types (name: str, public: int)
"type": "string",
# More schema & FTS options https://turbopuffer.com/docs/schema
"full_text_search": True,
}
}
)
# Query nearest neighbors with filter
print(ns.query(
vector=openai_or_rand_vector("walrus narwhal"),
top_k=10,
distance_metric="cosine_distance",
filters=["And", [["name", "Eq", "foo"], ["public", "Eq", 1]]],
include_attributes=["name"],
include_vectors=False,
))
# [VectorRow(id=1, vector=None, attributes={'name': 'foo'}, dist=0.009067952632904053)]
# Full-text search on an attribute
# If you want to combine FTS and vector search, see https://turbopuffer.com/docs/hybrid-search
print(ns.query(
top_k=10,
distance_metric="cosine_distance",
filters=["name", "Eq", "foo"],
rank_by=['text', 'BM25', 'quick walrus'],
))
# [VectorRow(id=1, vector=None, attributes={'name': 'foo'}, dist=0.19)]
# [VectorRow(id=2, vector=None, attributes={'name': 'foo'}, dist=0.168)]
# Vectors can be updated by passing new data for an existing ID
ns.upsert(
ids=[1, 2, 3],
vectors=[openai_or_rand_vector("foo"), openai_or_rand_vector("foo"), openai_or_rand_vector("foo")],
attributes={ "name": ["foo", "foo", "foo"], "public": [1, 1, 1] },
distance_metric='cosine_distance',
)
# Vectors are deleted by ID
ns.delete([1, 3])