Vector Databases & Embeddings

0 of 12 lessons completed

Hybrid Search: Combining Vector and Keyword

Hybrid search combines the precision of keyword search with the semantic understanding of vector search, delivering better results than either approach alone.

Why Hybrid Search?

Neither pure keyword nor pure semantic search is perfect:

Search TypeStrengthsWeaknesses
Keyword (BM25)Exact matches, rare terms, IDsMisses synonyms, no context
Semantic (Vector)Synonyms, concepts, contextMay miss exact keywords, IDs
HybridBest of bothMore complexity

Example

Query: "Python error ERR-404"

  • Keyword: Finds docs with exact "ERR-404" ✅
  • Semantic: Finds docs about Python errors ✅
  • Hybrid: Finds docs about Python error ERR-404 specifically ✅✅

Hybrid Search Approaches

1. Score Fusion (Reciprocal Rank Fusion - RRF)

from collections import defaultdict

def reciprocal_rank_fusion(
    results_lists: list[list[dict]],
    k: int = 60
) -> list[dict]:
    """
    Combine multiple result lists using RRF.
    RRF Score = sum(1 / (k + rank)) across all lists
    
    Args:
        results_lists: List of result lists, each with "id" and "score"
        k: Constant to prevent high ranks from dominating
    """
    scores = defaultdict(float)
    docs = {}
    
    for results in results_lists:
        for rank, doc in enumerate(results, 1):
            doc_id = doc["id"]
            scores[doc_id] += 1 / (k + rank)
            docs[doc_id] = doc
    
    # Sort by combined score
    sorted_ids = sorted(scores.keys(), key=lambda x: scores[x], reverse=True)
    
    return [
        {**docs[doc_id], "rrf_score": scores[doc_id]}
        for doc_id in sorted_ids
    ]

# Usage
keyword_results = bm25_search(query)  # [{id, content, score}, ...]
vector_results = vector_search(query)  # [{id, content, score}, ...]

combined = reciprocal_rank_fusion([keyword_results, vector_results])
print(combined[:10])  # Top 10 combined results

2. Linear Combination (Weighted Scoring)

def linear_combination(
    keyword_results: list[dict],
    vector_results: list[dict],
    alpha: float = 0.5
) -> list[dict]:
    """
    Combine using weighted scores.
    alpha = 0: pure keyword, alpha = 1: pure vector
    """
    # Normalize scores to 0-1 range
    def normalize(results):
        if not results:
            return {}
        scores = [r["score"] for r in results]
        min_s, max_s = min(scores), max(scores)
        range_s = max_s - min_s if max_s != min_s else 1
        return {
            r["id"]: (r["score"] - min_s) / range_s
            for r in results
        }
    
    keyword_norm = normalize(keyword_results)
    vector_norm = normalize(vector_results)
    
    # Combine all document IDs
    all_ids = set(keyword_norm.keys()) | set(vector_norm.keys())
    
    combined = []
    for doc_id in all_ids:
        kw_score = keyword_norm.get(doc_id, 0)
        vec_score = vector_norm.get(doc_id, 0)
        final_score = (1 - alpha) * kw_score + alpha * vec_score
        
        # Get document from either list
        doc = next(
            (r for r in keyword_results + vector_results if r["id"] == doc_id),
            None
        )
        if doc:
            combined.append({**doc, "hybrid_score": final_score})
    
    return sorted(combined, key=lambda x: x["hybrid_score"], reverse=True)

Built-in Hybrid Search

Weaviate Hybrid Search

import weaviate
from weaviate.classes.query import HybridFusion

client = weaviate.connect_to_local()
collection = client.collections.get("Documents")

# Hybrid search with alpha control
response = collection.query.hybrid(
    query="Python machine learning tutorial",
    alpha=0.5,  # 0 = pure BM25, 1 = pure vector
    limit=10,
    fusion_type=HybridFusion.RELATIVE_SCORE  # or RANKED
)

for obj in response.objects:
    print(f"Score: {obj.metadata.score}")
    print(f"Content: {obj.properties['content'][:100]}...")

# Autocut for smart result limiting
response = collection.query.hybrid(
    query="machine learning",
    alpha=0.7,
    auto_limit=3  # Automatically cut at score drops
)

Qdrant Hybrid Search

from qdrant_client import QdrantClient
from qdrant_client.models import SparseVector, SearchRequest, PrefetchQuery

# Qdrant uses sparse vectors for keyword search
# First, create collection with both dense and sparse vectors
client.create_collection(
    collection_name="hybrid_docs",
    vectors_config={
        "dense": VectorParams(size=1536, distance=Distance.COSINE)
    },
    sparse_vectors_config={
        "sparse": SparseVectorParams(index=SparseIndexParams())
    }
)

# Insert with both dense and sparse vectors
client.upsert(
    collection_name="hybrid_docs",
    points=[
        PointStruct(
            id=1,
            vector={
                "dense": dense_embedding,
                "sparse": SparseVector(
                    indices=[1, 5, 100, 354],  # Token IDs
                    values=[0.5, 0.3, 0.8, 0.2]  # Weights (TF-IDF/BM25)
                )
            },
            payload={"content": "..."}
        )
    ]
)

# Hybrid query using RRF
results = client.query_points(
    collection_name="hybrid_docs",
    prefetch=[
        PrefetchQuery(query=dense_query, using="dense", limit=20),
        PrefetchQuery(query=sparse_query, using="sparse", limit=20)
    ],
    query=FusionQuery(fusion=Fusion.RRF),
    limit=10
)

Pinecone Hybrid Search

from pinecone import Pinecone
from pinecone_text.sparse import BM25Encoder

# Initialize
pc = Pinecone(api_key="your-key")
index = pc.Index("hybrid-index")

# Create BM25 encoder (train on your corpus)
bm25 = BM25Encoder()
bm25.fit(documents)

# Create hybrid query
query = "machine learning Python tutorial"

dense_vec = get_embedding(query)
sparse_vec = bm25.encode_queries([query])[0]

# Search with both
results = index.query(
    vector=dense_vec,
    sparse_vector={
        "indices": sparse_vec["indices"],
        "values": sparse_vec["values"]
    },
    top_k=10,
    include_metadata=True
)

Custom Hybrid Implementation

from rank_bm25 import BM25Okapi
from openai import OpenAI
from qdrant_client import QdrantClient
import numpy as np

class HybridSearch:
    def __init__(self, collection_name: str = "documents"):
        self.openai = OpenAI()
        self.qdrant = QdrantClient("localhost", port=6333)
        self.collection_name = collection_name
        self.documents = []
        self.bm25 = None
    
    def index(self, documents: list[dict]):
        """Index for both keyword and vector search"""
        self.documents = documents
        texts = [d["content"] for d in documents]
        
        # Build BM25 index
        tokenized = [text.lower().split() for text in texts]
        self.bm25 = BM25Okapi(tokenized)
        
        # Build vector index
        embeddings = self._embed(texts)
        points = [
            PointStruct(id=i, vector=emb, payload=doc)
            for i, (doc, emb) in enumerate(zip(documents, embeddings))
        ]
        self.qdrant.upsert(collection_name=self.collection_name, points=points)
    
    def search(self, query: str, limit: int = 10, alpha: float = 0.5):
        """
        Hybrid search with configurable weighting.
        alpha = 0: pure BM25, alpha = 1: pure vector
        """
        # Keyword search
        tokenized_query = query.lower().split()
        bm25_scores = self.bm25.get_scores(tokenized_query)
        keyword_results = [
            {"id": i, "content": self.documents[i]["content"], "score": score}
            for i, score in enumerate(bm25_scores)
        ]
        keyword_results = sorted(
            keyword_results, key=lambda x: x["score"], reverse=True
        )[:limit * 2]
        
        # Vector search
        query_embedding = self._embed([query])[0]
        vector_results = self.qdrant.search(
            collection_name=self.collection_name,
            query_vector=query_embedding,
            limit=limit * 2
        )
        vector_results = [
            {"id": hit.id, "content": hit.payload["content"], "score": hit.score}
            for hit in vector_results
        ]
        
        # Combine with linear combination
        combined = self._combine_results(
            keyword_results, vector_results, alpha
        )
        
        return combined[:limit]
    
    def _combine_results(self, kw_results, vec_results, alpha):
        """Combine using normalized linear weighting"""
        # Normalize
        def norm(results):
            if not results:
                return {}
            scores = [r["score"] for r in results]
            min_s, max_s = min(scores), max(scores)
            if max_s == min_s:
                return {r["id"]: 0.5 for r in results}
            return {
                r["id"]: (r["score"] - min_s) / (max_s - min_s)
                for r in results
            }
        
        kw_norm = norm(kw_results)
        vec_norm = norm(vec_results)
        
        all_ids = set(kw_norm.keys()) | set(vec_norm.keys())
        doc_map = {r["id"]: r for r in kw_results + vec_results}
        
        combined = []
        for doc_id in all_ids:
            kw_score = kw_norm.get(doc_id, 0)
            vec_score = vec_norm.get(doc_id, 0)
            final = (1 - alpha) * kw_score + alpha * vec_score
            combined.append({**doc_map[doc_id], "hybrid_score": final})
        
        return sorted(combined, key=lambda x: x["hybrid_score"], reverse=True)
    
    def _embed(self, texts):
        response = self.openai.embeddings.create(
            model="text-embedding-3-small",
            input=texts
        )
        return [item.embedding for item in response.data]

Choosing Alpha

The alpha parameter controls the balance:

  • alpha = 0.0 - Pure keyword/BM25 search
  • alpha = 0.3 - Keyword-heavy (good for exact matches, IDs)
  • alpha = 0.5 - Balanced (good default)
  • alpha = 0.7 - Semantic-heavy (good for conceptual queries)
  • alpha = 1.0 - Pure semantic/vector search
# Query-adaptive alpha
def get_alpha(query: str) -> float:
    """Adjust alpha based on query characteristics"""
    words = query.split()
    
    # If query contains IDs, codes, exact phrases
    if any(w.isupper() or '-' in w for w in words):
        return 0.3  # Favor keyword
    
    # If query is very short
    if len(words) <= 2:
        return 0.6  # Slightly favor semantic
    
    # If query is a question
    if query.strip().endswith('?'):
        return 0.7  # Favor semantic
    
    return 0.5  # Default balanced

Best Practices

  • Use RRF for simplicity - No normalization needed
  • Use linear combination for fine-grained control
  • Tune alpha based on your use case and query types
  • Over-fetch from each source before combining
  • Evaluate both approaches separately to understand contributions

Key Takeaways

  • Hybrid search combines keyword and semantic for best results
  • RRF is simple and effective for score fusion
  • Alpha parameter controls the keyword vs semantic balance
  • Many vector databases have built-in hybrid search
  • Query-adaptive alpha can improve results further

In the next lesson, we'll cover deploying vector search applications to production.