Understanding Embeddings

Hybrid Search: Combining Vector and Keyword

Hybrid search combines the precision of keyword search with the semantic understanding of vector search, delivering better results than either approach alone.

Why Hybrid Search?

Neither pure keyword nor pure semantic search is perfect:

Search Type	Strengths	Weaknesses
Keyword (BM25)	Exact matches, rare terms, IDs	Misses synonyms, no context
Semantic (Vector)	Synonyms, concepts, context	May miss exact keywords, IDs
Hybrid	Best of both	More complexity

Example

Query: "Python error ERR-404"

Keyword: Finds docs with exact "ERR-404" ✅
Semantic: Finds docs about Python errors ✅
Hybrid: Finds docs about Python error ERR-404 specifically ✅✅

Hybrid Search Approaches

1. Score Fusion (Reciprocal Rank Fusion - RRF)

from collections import defaultdict

def reciprocal_rank_fusion(
    results_lists: list[list[dict]],
    k: int = 60
) -> list[dict]:
    """
    Combine multiple result lists using RRF.
    RRF Score = sum(1 / (k + rank)) across all lists
    
    Args:
        results_lists: List of result lists, each with "id" and "score"
        k: Constant to prevent high ranks from dominating
    """
    scores = defaultdict(float)
    docs = {}
    
    for results in results_lists:
        for rank, doc in enumerate(results, 1):
            doc_id = doc["id"]
            scores[doc_id] += 1 / (k + rank)
            docs[doc_id] = doc
    
    # Sort by combined score
    sorted_ids = sorted(scores.keys(), key=lambda x: scores[x], reverse=True)
    
    return [
        {**docs[doc_id], "rrf_score": scores[doc_id]}
        for doc_id in sorted_ids
    ]

# Usage
keyword_results = bm25_search(query)  # [{id, content, score}, ...]
vector_results = vector_search(query)  # [{id, content, score}, ...]

combined = reciprocal_rank_fusion([keyword_results, vector_results])
print(combined[:10])  # Top 10 combined results

2. Linear Combination (Weighted Scoring)

def linear_combination(
    keyword_results: list[dict],
    vector_results: list[dict],
    alpha: float = 0.5
) -> list[dict]:
    """
    Combine using weighted scores.
    alpha = 0: pure keyword, alpha = 1: pure vector
    """
    # Normalize scores to 0-1 range
    def normalize(results):
        if not results:
            return {}
        scores = [r["score"] for r in results]
        min_s, max_s = min(scores), max(scores)
        range_s = max_s - min_s if max_s != min_s else 1
        return {
            r["id"]: (r["score"] - min_s) / range_s
            for r in results
        }
    
    keyword_norm = normalize(keyword_results)
    vector_norm = normalize(vector_results)
    
    # Combine all document IDs
    all_ids = set(keyword_norm.keys()) | set(vector_norm.keys())
    
    combined = []
    for doc_id in all_ids:
        kw_score = keyword_norm.get(doc_id, 0)
        vec_score = vector_norm.get(doc_id, 0)
        final_score = (1 - alpha) * kw_score + alpha * vec_score
        
        # Get document from either list
        doc = next(
            (r for r in keyword_results + vector_results if r["id"] == doc_id),
            None
        )
        if doc:
            combined.append({**doc, "hybrid_score": final_score})
    
    return sorted(combined, key=lambda x: x["hybrid_score"], reverse=True)

Built-in Hybrid Search

Weaviate Hybrid Search

import weaviate
from weaviate.classes.query import HybridFusion

client = weaviate.connect_to_local()
collection = client.collections.get("Documents")

# Hybrid search with alpha control
response = collection.query.hybrid(
    query="Python machine learning tutorial",
    alpha=0.5,  # 0 = pure BM25, 1 = pure vector
    limit=10,
    fusion_type=HybridFusion.RELATIVE_SCORE  # or RANKED
)

for obj in response.objects:
    print(f"Score: {obj.metadata.score}")
    print(f"Content: {obj.properties['content'][:100]}...")

# Autocut for smart result limiting
response = collection.query.hybrid(
    query="machine learning",
    alpha=0.7,
    auto_limit=3  # Automatically cut at score drops
)

Qdrant Hybrid Search

from qdrant_client import QdrantClient
from qdrant_client.models import SparseVector, SearchRequest, PrefetchQuery

# Qdrant uses sparse vectors for keyword search
# First, create collection with both dense and sparse vectors
client.create_collection(
    collection_name="hybrid_docs",
    vectors_config={
        "dense": VectorParams(size=1536, distance=Distance.COSINE)
    },
    sparse_vectors_config={
        "sparse": SparseVectorParams(index=SparseIndexParams())
    }
)

# Insert with both dense and sparse vectors
client.upsert(
    collection_name="hybrid_docs",
    points=[
        PointStruct(
            id=1,
            vector={
                "dense": dense_embedding,
                "sparse": SparseVector(
                    indices=[1, 5, 100, 354],  # Token IDs
                    values=[0.5, 0.3, 0.8, 0.2]  # Weights (TF-IDF/BM25)
                )
            },
            payload={"content": "..."}
        )
    ]
)

# Hybrid query using RRF
results = client.query_points(
    collection_name="hybrid_docs",
    prefetch=[
        PrefetchQuery(query=dense_query, using="dense", limit=20),
        PrefetchQuery(query=sparse_query, using="sparse", limit=20)
    ],
    query=FusionQuery(fusion=Fusion.RRF),
    limit=10
)

Pinecone Hybrid Search

from pinecone import Pinecone
from pinecone_text.sparse import BM25Encoder

# Initialize
pc = Pinecone(api_key="your-key")
index = pc.Index("hybrid-index")

# Create BM25 encoder (train on your corpus)
bm25 = BM25Encoder()
bm25.fit(documents)

# Create hybrid query
query = "machine learning Python tutorial"

dense_vec = get_embedding(query)
sparse_vec = bm25.encode_queries([query])[0]

# Search with both
results = index.query(
    vector=dense_vec,
    sparse_vector={
        "indices": sparse_vec["indices"],
        "values": sparse_vec["values"]
    },
    top_k=10,
    include_metadata=True
)

Custom Hybrid Implementation

from rank_bm25 import BM25Okapi
from openai import OpenAI
from qdrant_client import QdrantClient
import numpy as np

class HybridSearch:
    def __init__(self, collection_name: str = "documents"):
        self.openai = OpenAI()
        self.qdrant = QdrantClient("localhost", port=6333)
        self.collection_name = collection_name
        self.documents = []
        self.bm25 = None
    
    def index(self, documents: list[dict]):
        """Index for both keyword and vector search"""
        self.documents = documents
        texts = [d["content"] for d in documents]
        
        # Build BM25 index
        tokenized = [text.lower().split() for text in texts]
        self.bm25 = BM25Okapi(tokenized)
        
        # Build vector index
        embeddings = self._embed(texts)
        points = [
            PointStruct(id=i, vector=emb, payload=doc)
            for i, (doc, emb) in enumerate(zip(documents, embeddings))
        ]
        self.qdrant.upsert(collection_name=self.collection_name, points=points)
    
    def search(self, query: str, limit: int = 10, alpha: float = 0.5):
        """
        Hybrid search with configurable weighting.
        alpha = 0: pure BM25, alpha = 1: pure vector
        """
        # Keyword search
        tokenized_query = query.lower().split()
        bm25_scores = self.bm25.get_scores(tokenized_query)
        keyword_results = [
            {"id": i, "content": self.documents[i]["content"], "score": score}
            for i, score in enumerate(bm25_scores)
        ]
        keyword_results = sorted(
            keyword_results, key=lambda x: x["score"], reverse=True
        )[:limit * 2]
        
        # Vector search
        query_embedding = self._embed([query])[0]
        vector_results = self.qdrant.search(
            collection_name=self.collection_name,
            query_vector=query_embedding,
            limit=limit * 2
        )
        vector_results = [
            {"id": hit.id, "content": hit.payload["content"], "score": hit.score}
            for hit in vector_results
        ]
        
        # Combine with linear combination
        combined = self._combine_results(
            keyword_results, vector_results, alpha
        )
        
        return combined[:limit]
    
    def _combine_results(self, kw_results, vec_results, alpha):
        """Combine using normalized linear weighting"""
        # Normalize
        def norm(results):
            if not results:
                return {}
            scores = [r["score"] for r in results]
            min_s, max_s = min(scores), max(scores)
            if max_s == min_s:
                return {r["id"]: 0.5 for r in results}
            return {
                r["id"]: (r["score"] - min_s) / (max_s - min_s)
                for r in results
            }
        
        kw_norm = norm(kw_results)
        vec_norm = norm(vec_results)
        
        all_ids = set(kw_norm.keys()) | set(vec_norm.keys())
        doc_map = {r["id"]: r for r in kw_results + vec_results}
        
        combined = []
        for doc_id in all_ids:
            kw_score = kw_norm.get(doc_id, 0)
            vec_score = vec_norm.get(doc_id, 0)
            final = (1 - alpha) * kw_score + alpha * vec_score
            combined.append({**doc_map[doc_id], "hybrid_score": final})
        
        return sorted(combined, key=lambda x: x["hybrid_score"], reverse=True)
    
    def _embed(self, texts):
        response = self.openai.embeddings.create(
            model="text-embedding-3-small",
            input=texts
        )
        return [item.embedding for item in response.data]

Choosing Alpha

The alpha parameter controls the balance:

alpha = 0.0 - Pure keyword/BM25 search
alpha = 0.3 - Keyword-heavy (good for exact matches, IDs)
alpha = 0.5 - Balanced (good default)
alpha = 0.7 - Semantic-heavy (good for conceptual queries)
alpha = 1.0 - Pure semantic/vector search

# Query-adaptive alpha
def get_alpha(query: str) -> float:
    """Adjust alpha based on query characteristics"""
    words = query.split()
    
    # If query contains IDs, codes, exact phrases
    if any(w.isupper() or '-' in w for w in words):
        return 0.3  # Favor keyword
    
    # If query is very short
    if len(words) <= 2:
        return 0.6  # Slightly favor semantic
    
    # If query is a question
    if query.strip().endswith('?'):
        return 0.7  # Favor semantic
    
    return 0.5  # Default balanced

Best Practices

Use RRF for simplicity - No normalization needed
Use linear combination for fine-grained control
Tune alpha based on your use case and query types
Over-fetch from each source before combining
Evaluate both approaches separately to understand contributions

Key Takeaways

Hybrid search combines keyword and semantic for best results
RRF is simple and effective for score fusion
Alpha parameter controls the keyword vs semantic balance
Many vector databases have built-in hybrid search
Query-adaptive alpha can improve results further

In the next lesson, we'll cover deploying vector search applications to production.

Vector Databases & Embeddings