Semantic search finds results based on meaning rather than exact keyword matches. This lesson shows you how to build end-to-end semantic search systems using vector databases.
| Aspect | Lexical (Keyword) | Semantic |
|---|---|---|
| Matching | Exact keywords | Meaning/context |
| Query: "car" | Only docs with "car" | Also: vehicle, automobile, sedan |
| Synonyms | ❌ Misses them | ✅ Understands them |
| Context | ❌ No understanding | ✅ Considers context |
| Speed | Very fast | Fast (with ANN) |
# Complete semantic search system
from openai import OpenAI
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct
import hashlib
class SemanticSearch:
def __init__(self, collection_name: str = "documents"):
self.openai = OpenAI()
self.qdrant = QdrantClient("localhost", port=6333)
self.collection_name = collection_name
self.model = "text-embedding-3-small"
self.dimension = 1536
self._ensure_collection()
def _ensure_collection(self):
"""Create collection if it doesn't exist"""
collections = self.qdrant.get_collections().collections
if not any(c.name == self.collection_name for c in collections):
self.qdrant.create_collection(
collection_name=self.collection_name,
vectors_config=VectorParams(
size=self.dimension,
distance=Distance.COSINE
)
)
def _embed(self, texts: list[str]) -> list[list[float]]:
"""Generate embeddings for texts"""
response = self.openai.embeddings.create(
model=self.model,
input=texts
)
return [item.embedding for item in response.data]
def _generate_id(self, text: str) -> str:
"""Generate deterministic ID from text"""
return hashlib.md5(text.encode()).hexdigest()[:16]
def index(self, documents: list[dict]):
"""Index documents with metadata
Args:
documents: List of {"content": str, "metadata": dict}
"""
texts = [doc["content"] for doc in documents]
embeddings = self._embed(texts)
points = [
PointStruct(
id=self._generate_id(doc["content"]),
vector=embedding,
payload={
"content": doc["content"],
**doc.get("metadata", {})
}
)
for doc, embedding in zip(documents, embeddings)
]
self.qdrant.upsert(
collection_name=self.collection_name,
points=points
)
return len(points)
def search(self, query: str, limit: int = 5, filter_dict: dict = None):
"""Semantic search
Args:
query: Search query
limit: Number of results
filter_dict: Optional filter conditions
"""
query_embedding = self._embed([query])[0]
# Build filter if provided
query_filter = None
if filter_dict:
from qdrant_client.models import Filter, FieldCondition, MatchValue
conditions = [
FieldCondition(key=k, match=MatchValue(value=v))
for k, v in filter_dict.items()
]
query_filter = Filter(must=conditions)
results = self.qdrant.search(
collection_name=self.collection_name,
query_vector=query_embedding,
query_filter=query_filter,
limit=limit
)
return [
{
"content": hit.payload["content"],
"score": hit.score,
"metadata": {k: v for k, v in hit.payload.items() if k != "content"}
}
for hit in results
]# Initialize
search = SemanticSearch("product_docs")
# Index documents
documents = [
{
"content": "The MacBook Pro features the M3 chip with 8-core CPU...",
"metadata": {"category": "laptops", "brand": "Apple"}
},
{
"content": "Dell XPS 15 comes with Intel Core i9 processor...",
"metadata": {"category": "laptops", "brand": "Dell"}
},
{
"content": "iPhone 15 Pro has a titanium design...",
"metadata": {"category": "phones", "brand": "Apple"}
}
]
search.index(documents)
# Search
results = search.search("powerful laptop for development")
# Returns MacBook Pro and Dell XPS even without exact keywords
# Search with filter
results = search.search(
"best computer for coding",
filter_dict={"brand": "Apple"}
)
# Only Apple productsfrom typing import Generator
import tiktoken
class DocumentProcessor:
def __init__(self, chunk_size: int = 512, chunk_overlap: int = 50):
self.chunk_size = chunk_size
self.chunk_overlap = chunk_overlap
self.tokenizer = tiktoken.get_encoding("cl100k_base")
def chunk_document(self, text: str, source: str = None) -> list[dict]:
"""Split document into overlapping chunks"""
tokens = self.tokenizer.encode(text)
chunks = []
start = 0
while start < len(tokens):
end = start + self.chunk_size
chunk_tokens = tokens[start:end]
chunk_text = self.tokenizer.decode(chunk_tokens)
chunks.append({
"content": chunk_text,
"metadata": {
"source": source,
"chunk_index": len(chunks),
"token_count": len(chunk_tokens)
}
})
start = end - self.chunk_overlap
return chunks
def process_documents(self, documents: list[dict]) -> Generator[dict, None, None]:
"""Process multiple documents"""
for doc in documents:
chunks = self.chunk_document(
doc["content"],
source=doc.get("source", "unknown")
)
for chunk in chunks:
# Merge original metadata
chunk["metadata"].update(doc.get("metadata", {}))
yield chunk
# Usage
processor = DocumentProcessor(chunk_size=512, chunk_overlap=50)
# Process large documents
raw_docs = [
{"content": "Very long document text...", "source": "manual.pdf"},
{"content": "Another long document...", "source": "guide.pdf"}
]
chunks = list(processor.process_documents(raw_docs))
search.index(chunks)from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
app = FastAPI()
search = SemanticSearch("documents")
class SearchRequest(BaseModel):
query: str
limit: int = 5
filters: dict = None
class IndexRequest(BaseModel):
documents: list[dict]
@app.post("/index")
async def index_documents(request: IndexRequest):
"""Index documents"""
count = search.index(request.documents)
return {"indexed": count}
@app.post("/search")
async def search_documents(request: SearchRequest):
"""Search documents"""
results = search.search(
query=request.query,
limit=request.limit,
filter_dict=request.filters
)
return {"results": results}
# Run: uvicorn app:app --reload// components/SemanticSearch.tsx
'use client';
import { useState } from 'react';
interface SearchResult {
content: string;
score: number;
metadata: Record<string, string>;
}
export function SemanticSearch() {
const [query, setQuery] = useState('');
const [results, setResults] = useState<SearchResult[]>([]);
const [loading, setLoading] = useState(false);
const handleSearch = async () => {
setLoading(true);
try {
const res = await fetch('/api/search', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ query, limit: 5 })
});
const data = await res.json();
setResults(data.results);
} finally {
setLoading(false);
}
};
return (
<div>
<input
value={query}
onChange={(e) => setQuery(e.target.value)}
placeholder="Search semantically..."
onKeyDown={(e) => e.key === 'Enter' && handleSearch()}
/>
<button onClick={handleSearch} disabled={loading}>
{loading ? 'Searching...' : 'Search'}
</button>
<div className="results">
{results.map((result, i) => (
<div key={i} className="result">
<div className="score">
{(result.score * 100).toFixed(1)}% match
</div>
<p>{result.content}</p>
</div>
))}
</div>
</div>
);
}# 1. Batch embedding requests
def batch_embed(texts: list[str], batch_size: int = 100):
"""Embed in batches to avoid rate limits"""
all_embeddings = []
for i in range(0, len(texts), batch_size):
batch = texts[i:i + batch_size]
embeddings = embed(batch)
all_embeddings.extend(embeddings)
return all_embeddings
# 2. Use caching for frequent queries
from functools import lru_cache
import hashlib
@lru_cache(maxsize=1000)
def cached_embed(text: str) -> tuple[float, ...]:
"""Cache embeddings (tuple for hashability)"""
embedding = embed([text])[0]
return tuple(embedding)
# 3. Async for concurrent operations
import asyncio
from openai import AsyncOpenAI
async def async_embed(texts: list[str]) -> list[list[float]]:
client = AsyncOpenAI()
response = await client.embeddings.create(
model="text-embedding-3-small",
input=texts
)
return [item.embedding for item in response.data]
# 4. Use quantization for large scale
from qdrant_client.models import ScalarQuantization, ScalarQuantizationConfig
# Reduces memory by 4x
client.create_collection(
collection_name="large_collection",
vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
quantization_config=ScalarQuantization(
scalar=ScalarQuantizationConfig(type="int8")
)
)In the next lesson, we'll explore hybrid search, combining semantic and lexical approaches for even better results.