Weaviate and Qdrant are powerful open-source vector databases with different strengths. This lesson covers both, helping you choose the right one for your use case.
Weaviate is an open-source vector database with a GraphQL API, built-in vectorization, and multimodal support.
# Install
pip install weaviate-client
# Run with Docker
docker run -p 8080:8080 -p 50051:50051 semitechnologies/weaviate:latestimport weaviate
from weaviate.classes.config import Configure, Property, DataType
# Connect to Weaviate
client = weaviate.connect_to_local()
# Or connect to Weaviate Cloud
client = weaviate.connect_to_weaviate_cloud(
cluster_url="your-cluster-url",
auth_credentials=weaviate.auth.AuthApiKey("your-api-key")
)
# Create collection with vectorizer
client.collections.create(
name="Document",
vectorizer_config=Configure.Vectorizer.text2vec_openai(),
properties=[
Property(name="content", data_type=DataType.TEXT),
Property(name="source", data_type=DataType.TEXT),
Property(name="category", data_type=DataType.TEXT),
]
)# Get collection
documents = client.collections.get("Document")
# Insert with automatic vectorization
documents.data.insert({
"content": "Machine learning is a subset of artificial intelligence...",
"source": "documentation",
"category": "ml"
})
# Batch insert
with documents.batch.dynamic() as batch:
for doc in docs_list:
batch.add_object(properties=doc)
# Insert with your own vector
documents.data.insert(
properties={"content": "...", "source": "..."},
vector=[0.1, 0.2, ...] # Your embedding
)from weaviate.classes.query import Filter, MetadataQuery
# Vector search (semantic)
response = documents.query.near_text(
query="What is deep learning?",
limit=5,
return_metadata=MetadataQuery(distance=True)
)
for obj in response.objects:
print(f"Distance: {obj.metadata.distance}")
print(f"Content: {obj.properties['content'][:100]}...")
# With filters
response = documents.query.near_text(
query="machine learning",
limit=5,
filters=Filter.by_property("category").equal("ml")
)
# Hybrid search (vector + BM25)
response = documents.query.hybrid(
query="deep learning neural networks",
limit=5,
alpha=0.5 # 0 = pure BM25, 1 = pure vector
)# Configure generative module
client.collections.create(
name="Document",
vectorizer_config=Configure.Vectorizer.text2vec_openai(),
generative_config=Configure.Generative.openai()
)
# Query with generation
documents = client.collections.get("Document")
response = documents.generate.near_text(
query="What is machine learning?",
limit=3,
grouped_task="Summarize these documents in one paragraph."
)
print(response.generated) # LLM-generated summaryQdrant is a high-performance vector database written in Rust, known for speed and rich filtering capabilities.
# Install
pip install qdrant-client
# Run with Docker
docker run -p 6333:6333 qdrant/qdrantfrom qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct
# Connect
client = QdrantClient("localhost", port=6333)
# Or use Qdrant Cloud
client = QdrantClient(
url="your-cluster-url",
api_key="your-api-key"
)
# Create collection
client.create_collection(
collection_name="documents",
vectors_config=VectorParams(
size=1536,
distance=Distance.COSINE
)
)from openai import OpenAI
openai = OpenAI()
def get_embedding(text: str) -> list[float]:
response = openai.embeddings.create(
model="text-embedding-3-small",
input=text
)
return response.data[0].embedding
# Insert single point
client.upsert(
collection_name="documents",
points=[
PointStruct(
id=1,
vector=get_embedding("Machine learning is..."),
payload={
"content": "Machine learning is...",
"source": "docs",
"category": "ml"
}
)
]
)
# Batch insert
points = [
PointStruct(
id=i,
vector=get_embedding(doc["text"]),
payload=doc
)
for i, doc in enumerate(documents)
]
client.upsert(
collection_name="documents",
points=points,
wait=True # Wait for indexing
)from qdrant_client.models import Filter, FieldCondition, MatchValue, Range
# Basic search
results = client.search(
collection_name="documents",
query_vector=get_embedding("What is deep learning?"),
limit=5
)
for result in results:
print(f"Score: {result.score:.4f}")
print(f"Content: {result.payload['content'][:100]}...")
# Search with filters
results = client.search(
collection_name="documents",
query_vector=get_embedding("machine learning"),
query_filter=Filter(
must=[
FieldCondition(
key="category",
match=MatchValue(value="ml")
)
]
),
limit=5
)
# Complex filters
results = client.search(
collection_name="documents",
query_vector=query_embedding,
query_filter=Filter(
must=[
FieldCondition(key="category", match=MatchValue(value="ml")),
FieldCondition(key="date", range=Range(gte="2024-01-01")),
],
should=[
FieldCondition(key="source", match=MatchValue(value="official")),
]
),
limit=10
)from qdrant_client.models import PayloadSchemaType
# Create index on payload fields for faster filtering
client.create_payload_index(
collection_name="documents",
field_name="category",
field_schema=PayloadSchemaType.KEYWORD
)
client.create_payload_index(
collection_name="documents",
field_name="date",
field_schema=PayloadSchemaType.DATETIME
)from qdrant_client.models import ScalarQuantization, ScalarQuantizationConfig
# Create collection with scalar quantization
client.create_collection(
collection_name="documents_quantized",
vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
quantization_config=ScalarQuantization(
scalar=ScalarQuantizationConfig(
type="int8",
quantile=0.99,
always_ram=True
)
)
)
# Reduces memory by ~4x with minimal quality loss| Feature | Weaviate | Qdrant |
|---|---|---|
| Language | Go | Rust |
| API | GraphQL + REST | REST + gRPC |
| Built-in Vectorizers | ✅ Yes (many) | ❌ No (bring your own) |
| Generative Search | ✅ Built-in RAG | ❌ External |
| Multimodal | ✅ Native support | ✅ Via multi-vector |
| Filtering Performance | Good | Excellent (payload indices) |
| Quantization | ✅ Product quantization | ✅ Scalar + Binary |
| Best For | Full-featured, multimodal | Performance, complex filters |
In the next lesson, we'll explore ChromaDB and Milvus - other popular options in the vector database ecosystem.