Pinecone is a fully managed, serverless vector database designed for production AI applications. This lesson covers Pinecone's architecture, features, and hands-on implementation.
# Install
pip install pineconefrom pinecone import Pinecone, ServerlessSpec
# Initialize client
pc = Pinecone(api_key="your-api-key")
# Create serverless index
pc.create_index(
name="my-rag-index",
dimension=1536, # OpenAI embedding dimension
metric="cosine",
spec=ServerlessSpec(
cloud="aws",
region="us-east-1"
)
)
# Connect to index
index = pc.Index("my-rag-index")
# Check index stats
print(index.describe_index_stats())from openai import OpenAI
openai = OpenAI()
def get_embedding(text: str) -> list[float]:
response = openai.embeddings.create(
model="text-embedding-3-small",
input=text
)
return response.data[0].embedding
# Prepare vectors with metadata
documents = [
{"id": "doc1", "text": "Machine learning is a subset of AI..."},
{"id": "doc2", "text": "Neural networks are inspired by the brain..."},
{"id": "doc3", "text": "Deep learning uses multiple layers..."},
]
vectors = [
{
"id": doc["id"],
"values": get_embedding(doc["text"]),
"metadata": {
"text": doc["text"],
"source": "documentation",
"category": "ml"
}
}
for doc in documents
]
# Upsert (insert or update)
index.upsert(vectors=vectors, namespace="docs")
# Batch upsert for large datasets
batch_size = 100
for i in range(0, len(vectors), batch_size):
batch = vectors[i:i+batch_size]
index.upsert(vectors=batch, namespace="docs")# Basic query
query = "What is deep learning?"
query_embedding = get_embedding(query)
results = index.query(
vector=query_embedding,
top_k=5,
namespace="docs",
include_metadata=True
)
for match in results.matches:
print(f"Score: {match.score:.4f}")
print(f"Text: {match.metadata['text'][:100]}...")
print("---")# Query with filters
results = index.query(
vector=query_embedding,
top_k=5,
namespace="docs",
include_metadata=True,
filter={
"category": {"$eq": "ml"},
"$or": [
{"source": {"$eq": "documentation"}},
{"date": {"$gte": "2024-01-01"}}
]
}
)
# Available filter operators:
# $eq, $ne - equals, not equals
# $gt, $gte, $lt, $lte - comparisons
# $in, $nin - in list, not in list
# $and, $or - logical operatorsNamespaces partition your index for multi-tenancy or logical separation.
# Different namespaces for different users/purposes
index.upsert(vectors=user1_docs, namespace="user-123")
index.upsert(vectors=user2_docs, namespace="user-456")
# Query only within a namespace
results = index.query(
vector=query_embedding,
top_k=5,
namespace="user-123" # Only searches this user's data
)
# Delete namespace
index.delete(delete_all=True, namespace="user-123")Pinecone supports hybrid search combining dense vectors with sparse (BM25-style) vectors.
from pinecone_text.sparse import BM25Encoder
# Initialize BM25 encoder
bm25 = BM25Encoder()
bm25.fit(corpus) # Fit on your documents
# Create hybrid vectors
def create_hybrid_vector(text: str) -> dict:
dense = get_embedding(text)
sparse = bm25.encode_documents([text])[0]
return {
"values": dense,
"sparse_values": {
"indices": sparse["indices"],
"values": sparse["values"]
}
}
# Upsert hybrid vectors
vectors = [
{
"id": doc["id"],
**create_hybrid_vector(doc["text"]),
"metadata": {"text": doc["text"]}
}
for doc in documents
]
index.upsert(vectors=vectors)
# Hybrid query
query_dense = get_embedding(query)
query_sparse = bm25.encode_queries([query])[0]
results = index.query(
vector=query_dense,
sparse_vector={
"indices": query_sparse["indices"],
"values": query_sparse["values"]
},
top_k=10,
include_metadata=True
)# Fetch vectors by ID
fetched = index.fetch(ids=["doc1", "doc2"], namespace="docs")
print(fetched.vectors["doc1"].metadata)
# Update metadata only (without re-embedding)
index.update(
id="doc1",
set_metadata={"category": "updated_category"},
namespace="docs"
)
# Delete by ID
index.delete(ids=["doc1", "doc2"], namespace="docs")
# Delete by filter
index.delete(
filter={"category": {"$eq": "old_category"}},
namespace="docs"
)
# Delete entire namespace
index.delete(delete_all=True, namespace="docs")class PineconeRAG:
def __init__(self, index_name: str):
self.pc = Pinecone()
self.index = self.pc.Index(index_name)
self.openai = OpenAI()
def embed(self, text: str) -> list[float]:
response = self.openai.embeddings.create(
model="text-embedding-3-small",
input=text
)
return response.data[0].embedding
def add_documents(self, documents: list[dict], namespace: str = ""):
vectors = []
for doc in documents:
vectors.append({
"id": doc["id"],
"values": self.embed(doc["text"]),
"metadata": {
"text": doc["text"],
**doc.get("metadata", {})
}
})
# Batch upsert
for i in range(0, len(vectors), 100):
self.index.upsert(
vectors=vectors[i:i+100],
namespace=namespace
)
def search(
self,
query: str,
top_k: int = 5,
filter: dict = None,
namespace: str = ""
) -> list[dict]:
results = self.index.query(
vector=self.embed(query),
top_k=top_k,
filter=filter,
namespace=namespace,
include_metadata=True
)
return [
{
"id": m.id,
"score": m.score,
"text": m.metadata.get("text", ""),
"metadata": m.metadata
}
for m in results.matches
]
def query(self, question: str, namespace: str = "") -> str:
# Retrieve relevant documents
results = self.search(question, top_k=5, namespace=namespace)
# Build context
context = "\n---\n".join([r["text"] for r in results])
# Generate answer
response = self.openai.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "system",
"content": "Answer based on the provided context."
},
{
"role": "user",
"content": f"Context:\n{context}\n\nQuestion: {question}"
}
]
)
return response.choices[0].message.content
# Usage
rag = PineconeRAG("my-rag-index")
answer = rag.query("What is deep learning?")import asyncio
from pinecone import Pinecone
async def async_search(queries: list[str]):
pc = Pinecone()
index = pc.Index("my-index")
# Run multiple queries concurrently
tasks = []
for query in queries:
embedding = get_embedding(query)
task = asyncio.to_thread(
index.query,
vector=embedding,
top_k=5
)
tasks.append(task)
results = await asyncio.gather(*tasks)
return results# List all indexes
indexes = pc.list_indexes()
print([idx.name for idx in indexes])
# Describe index
index_info = pc.describe_index("my-index")
print(f"Dimension: {index_info.dimension}")
print(f"Metric: {index_info.metric}")
print(f"Status: {index_info.status}")
# Get index stats
stats = index.describe_index_stats()
print(f"Total vectors: {stats.total_vector_count}")
print(f"Namespaces: {stats.namespaces}")
# Delete index
pc.delete_index("old-index")In the next lesson, we'll explore Weaviate and Qdrant - open-source alternatives with different strengths.