Hybrid search combines the precision of keyword search with the semantic understanding of vector search, delivering better results than either approach alone.
Neither pure keyword nor pure semantic search is perfect:
| Search Type | Strengths | Weaknesses |
|---|---|---|
| Keyword (BM25) | Exact matches, rare terms, IDs | Misses synonyms, no context |
| Semantic (Vector) | Synonyms, concepts, context | May miss exact keywords, IDs |
| Hybrid | Best of both | More complexity |
Query: "Python error ERR-404"
from collections import defaultdict
def reciprocal_rank_fusion(
results_lists: list[list[dict]],
k: int = 60
) -> list[dict]:
"""
Combine multiple result lists using RRF.
RRF Score = sum(1 / (k + rank)) across all lists
Args:
results_lists: List of result lists, each with "id" and "score"
k: Constant to prevent high ranks from dominating
"""
scores = defaultdict(float)
docs = {}
for results in results_lists:
for rank, doc in enumerate(results, 1):
doc_id = doc["id"]
scores[doc_id] += 1 / (k + rank)
docs[doc_id] = doc
# Sort by combined score
sorted_ids = sorted(scores.keys(), key=lambda x: scores[x], reverse=True)
return [
{**docs[doc_id], "rrf_score": scores[doc_id]}
for doc_id in sorted_ids
]
# Usage
keyword_results = bm25_search(query) # [{id, content, score}, ...]
vector_results = vector_search(query) # [{id, content, score}, ...]
combined = reciprocal_rank_fusion([keyword_results, vector_results])
print(combined[:10]) # Top 10 combined resultsdef linear_combination(
keyword_results: list[dict],
vector_results: list[dict],
alpha: float = 0.5
) -> list[dict]:
"""
Combine using weighted scores.
alpha = 0: pure keyword, alpha = 1: pure vector
"""
# Normalize scores to 0-1 range
def normalize(results):
if not results:
return {}
scores = [r["score"] for r in results]
min_s, max_s = min(scores), max(scores)
range_s = max_s - min_s if max_s != min_s else 1
return {
r["id"]: (r["score"] - min_s) / range_s
for r in results
}
keyword_norm = normalize(keyword_results)
vector_norm = normalize(vector_results)
# Combine all document IDs
all_ids = set(keyword_norm.keys()) | set(vector_norm.keys())
combined = []
for doc_id in all_ids:
kw_score = keyword_norm.get(doc_id, 0)
vec_score = vector_norm.get(doc_id, 0)
final_score = (1 - alpha) * kw_score + alpha * vec_score
# Get document from either list
doc = next(
(r for r in keyword_results + vector_results if r["id"] == doc_id),
None
)
if doc:
combined.append({**doc, "hybrid_score": final_score})
return sorted(combined, key=lambda x: x["hybrid_score"], reverse=True)import weaviate
from weaviate.classes.query import HybridFusion
client = weaviate.connect_to_local()
collection = client.collections.get("Documents")
# Hybrid search with alpha control
response = collection.query.hybrid(
query="Python machine learning tutorial",
alpha=0.5, # 0 = pure BM25, 1 = pure vector
limit=10,
fusion_type=HybridFusion.RELATIVE_SCORE # or RANKED
)
for obj in response.objects:
print(f"Score: {obj.metadata.score}")
print(f"Content: {obj.properties['content'][:100]}...")
# Autocut for smart result limiting
response = collection.query.hybrid(
query="machine learning",
alpha=0.7,
auto_limit=3 # Automatically cut at score drops
)from qdrant_client import QdrantClient
from qdrant_client.models import SparseVector, SearchRequest, PrefetchQuery
# Qdrant uses sparse vectors for keyword search
# First, create collection with both dense and sparse vectors
client.create_collection(
collection_name="hybrid_docs",
vectors_config={
"dense": VectorParams(size=1536, distance=Distance.COSINE)
},
sparse_vectors_config={
"sparse": SparseVectorParams(index=SparseIndexParams())
}
)
# Insert with both dense and sparse vectors
client.upsert(
collection_name="hybrid_docs",
points=[
PointStruct(
id=1,
vector={
"dense": dense_embedding,
"sparse": SparseVector(
indices=[1, 5, 100, 354], # Token IDs
values=[0.5, 0.3, 0.8, 0.2] # Weights (TF-IDF/BM25)
)
},
payload={"content": "..."}
)
]
)
# Hybrid query using RRF
results = client.query_points(
collection_name="hybrid_docs",
prefetch=[
PrefetchQuery(query=dense_query, using="dense", limit=20),
PrefetchQuery(query=sparse_query, using="sparse", limit=20)
],
query=FusionQuery(fusion=Fusion.RRF),
limit=10
)from pinecone import Pinecone
from pinecone_text.sparse import BM25Encoder
# Initialize
pc = Pinecone(api_key="your-key")
index = pc.Index("hybrid-index")
# Create BM25 encoder (train on your corpus)
bm25 = BM25Encoder()
bm25.fit(documents)
# Create hybrid query
query = "machine learning Python tutorial"
dense_vec = get_embedding(query)
sparse_vec = bm25.encode_queries([query])[0]
# Search with both
results = index.query(
vector=dense_vec,
sparse_vector={
"indices": sparse_vec["indices"],
"values": sparse_vec["values"]
},
top_k=10,
include_metadata=True
)from rank_bm25 import BM25Okapi
from openai import OpenAI
from qdrant_client import QdrantClient
import numpy as np
class HybridSearch:
def __init__(self, collection_name: str = "documents"):
self.openai = OpenAI()
self.qdrant = QdrantClient("localhost", port=6333)
self.collection_name = collection_name
self.documents = []
self.bm25 = None
def index(self, documents: list[dict]):
"""Index for both keyword and vector search"""
self.documents = documents
texts = [d["content"] for d in documents]
# Build BM25 index
tokenized = [text.lower().split() for text in texts]
self.bm25 = BM25Okapi(tokenized)
# Build vector index
embeddings = self._embed(texts)
points = [
PointStruct(id=i, vector=emb, payload=doc)
for i, (doc, emb) in enumerate(zip(documents, embeddings))
]
self.qdrant.upsert(collection_name=self.collection_name, points=points)
def search(self, query: str, limit: int = 10, alpha: float = 0.5):
"""
Hybrid search with configurable weighting.
alpha = 0: pure BM25, alpha = 1: pure vector
"""
# Keyword search
tokenized_query = query.lower().split()
bm25_scores = self.bm25.get_scores(tokenized_query)
keyword_results = [
{"id": i, "content": self.documents[i]["content"], "score": score}
for i, score in enumerate(bm25_scores)
]
keyword_results = sorted(
keyword_results, key=lambda x: x["score"], reverse=True
)[:limit * 2]
# Vector search
query_embedding = self._embed([query])[0]
vector_results = self.qdrant.search(
collection_name=self.collection_name,
query_vector=query_embedding,
limit=limit * 2
)
vector_results = [
{"id": hit.id, "content": hit.payload["content"], "score": hit.score}
for hit in vector_results
]
# Combine with linear combination
combined = self._combine_results(
keyword_results, vector_results, alpha
)
return combined[:limit]
def _combine_results(self, kw_results, vec_results, alpha):
"""Combine using normalized linear weighting"""
# Normalize
def norm(results):
if not results:
return {}
scores = [r["score"] for r in results]
min_s, max_s = min(scores), max(scores)
if max_s == min_s:
return {r["id"]: 0.5 for r in results}
return {
r["id"]: (r["score"] - min_s) / (max_s - min_s)
for r in results
}
kw_norm = norm(kw_results)
vec_norm = norm(vec_results)
all_ids = set(kw_norm.keys()) | set(vec_norm.keys())
doc_map = {r["id"]: r for r in kw_results + vec_results}
combined = []
for doc_id in all_ids:
kw_score = kw_norm.get(doc_id, 0)
vec_score = vec_norm.get(doc_id, 0)
final = (1 - alpha) * kw_score + alpha * vec_score
combined.append({**doc_map[doc_id], "hybrid_score": final})
return sorted(combined, key=lambda x: x["hybrid_score"], reverse=True)
def _embed(self, texts):
response = self.openai.embeddings.create(
model="text-embedding-3-small",
input=texts
)
return [item.embedding for item in response.data]The alpha parameter controls the balance:
# Query-adaptive alpha
def get_alpha(query: str) -> float:
"""Adjust alpha based on query characteristics"""
words = query.split()
# If query contains IDs, codes, exact phrases
if any(w.isupper() or '-' in w for w in words):
return 0.3 # Favor keyword
# If query is very short
if len(words) <= 2:
return 0.6 # Slightly favor semantic
# If query is a question
if query.strip().endswith('?'):
return 0.7 # Favor semantic
return 0.5 # Default balancedIn the next lesson, we'll cover deploying vector search applications to production.