322 字
1 分钟
RAG 优化策略深度解析
一、RAG 优化概述
1.1 为什么需要 RAG 优化
graph TB
subgraph "基础 RAG 问题"
A["检索质量差"]
B["Context 冗余"]
C["引用不准确"]
D["幻觉依然存在"]
end
subgraph "优化方向"
E["Query 改写"]
F["混合检索"]
G["智能重排"]
H["Context 压缩"]
end
A --> E
B --> F
C --> G
D --> H
| 问题类型 | 典型表现 | 优化策略 |
|---|---|---|
| 检索不到 | 关键词不匹配语义 | Query 改写/扩展 |
| 检索太多 | 返回大量无关 Context | 混合检索 + 重排序 |
| Context 过长 | 超过模型上下文限制 | Context 压缩/摘要 |
| 引用不准 | 答案与引用不匹配 | 引用追踪与验证 |
1.2 优化架构总览
flowchart LR
A["用户 Query"] --> B["Query 改写"]
B --> C["混合检索"]
C --> D["粗排召回"]
D --> E["精排重排序"]
E --> F["Context 组装"]
F --> G["LLM 生成"]
G --> H["引用验证"]
H --> I["最终输出"]
二、Query 改写与扩展
2.1 Query 改写策略
class QueryRewriter: def __init__(self, llm): self.llm = llm
def rewrite(self, query: str) -> str: """ Query 改写核心策略: 1. 隐式表述展开 2. 同义词扩展 3. 假设类型注入 """ prompts = [ # 展开缩写和隐含概念 f"将以下查询展开为完整表述:{query}", # 生成同义词变体 f"提供查询的同义词表达:{query}", ]
expanded = self.llm.batch_generate(prompts)
# 合并改写结果 return self._merge_rewrite(query, expanded)
def _merge_rewrite(self, original: str, rewrites: list) -> str: # 去重合并 variants = list(set([original] + rewrites)) return " | ".join(variants)2.2 HyDE 假设文档
# HyDE (Hypothetical Document Embeddings)class HyDERetriever: def __init__(self, llm, vector_store): self.llm = llm self.vector_store = vector_store
def retrieve(self, query: str, top_k: int = 5): """ HyDE 核心思想: 1. 让 LLM 生成假设性答案 2. 用假设答案去检索(而非原始 Query) 3. 假设答案与真实文档更匹配 """ # 1. 生成假设答案 hypothetical_doc = self.llm.generate( f"假设你是专家,请给出以下问题的详细答案:\n{query}" )
# 2. 用假设答案检索 results = self.vector_store.similarity_search( hypothetical_doc, top_k )
# 3. 额外用原始 Query 检索 original_results = self.vector_store.similarity_search( query, top_k )
# 4. 融合结果 return self._fusion_results(results, original_results)
def _fusion_results(self, hyde_results, original_results, k=60): """RRF 融合""" scores = {} for i, doc in enumerate(hyde_results): scores[doc.id] = scores.get(doc.id, 0) + 1 / (k + i + 1) for i, doc in enumerate(original_results): scores[doc.id] = scores.get(doc.id, 0) + 1 / (k + i + 1)
return sorted(scores.items(), key=lambda x: -x[1])[:top_k]2.3 Query 扩展技术
class QueryExpander: def expand(self, query: str) -> list[str]: """ 多角度 Query 扩展 """ expansions = []
# 1. 核心词提取 + 同义词 core_terms = self._extract_core_terms(query) for term in core_terms: synonyms = self._get_synonyms(term) for syn in synonyms: expansions.append(query.replace(term, syn))
# 2. 下位词扩展(具体化) hyponyms = self._get_hyponyms(core_terms) for hypo in hyponyms: expansions.append(f"{query} {hypo}")
# 3. 上位词扩展(泛化) hypernyms = self._get_hypernyms(core_terms) for hyper in hypernyms: expansions.append(query.replace(core_terms[0], hyper))
return list(set(expansions))
def _get_synonyms(self, term: str) -> list: """使用词向量找相似词""" term_vec = self.embedder.encode([term]) similar = self.vector_index.search(term_vec, top_k=5) return [s for s in similar if s != term]三、混合检索策略
3.1 混合检索架构
graph TB
A["Query"] --> B["向量检索"]
A --> C["BM25 检索"]
A --> D["稀疏检索"]
A --> E["密集检索"]
B --> F["向量结果集"]
C --> G["BM25 结果集"]
D --> H["稀疏结果集"]
E --> I["密集结果集"]
F --> J["结果融合"]
G --> J
H --> J
I --> J
J --> K["RRF 融合"]
K --> L["Top-K 召回"]
3.2 BM25 + 向量混合
class HybridRetriever: def __init__(self, vector_store, bm25_index): self.vector_store = vector_store self.bm25_index = bm25_index
def retrieve(self, query: str, top_k: int = 10): # 1. 向量检索 vector_results = self.vector_store.search( self.embed(query), top_k * 2 )
# 2. BM25 检索 bm25_results = self.bm25_index.search( query, top_k * 2 )
# 3. RRF 融合 fused = self._rrf_fusion( [vector_results, bm25_results], k=60 )
return fused[:top_k]
def _rrf_fusion(self, result_lists: list, k: int = 60) -> list: """Reciprocal Rank Fusion""" scores = {} for results in result_lists: for i, doc in enumerate(results): doc_id = doc.id scores[doc_id] = scores.get(doc_id, 0) + 1 / (k + i + 1)
return sorted(scores.items(), key=lambda x: -x[1])3.3 稀疏检索与密集检索
# SPLADE 稀疏检索class SPLADERetriever: def __init__(self, model): self.model = model self.model.eval()
def encode(self, text: str) -> dict: """ SPLADE 输出稀疏向量(词权重) 例如:{"python": 2.5, "编程": 1.8, "语言": 0.9} """ with torch.no_grad(): outputs = self.model(**self.tokenizer(text)) weights = torch.max( torch.log(1 + torch.relu(outputs.logits)), dim=-1 )
# 转换为稀疏表示 sparse_vec = {} for idx, weight in weights[0].items(): if weight > 0.01: sparse_vec[self.tokenizer.decode([idx])] = weight.item()
return sparse_vec
def search(self, query: str, documents: list, top_k: int): """稀疏向量点积""" query_vec = self.encode(query)
scores = [] for doc in documents: doc_vec = self.encode(doc) score = sum( query_vec.get(k, 0) * v for k, v in doc_vec.items() ) scores.append((doc, score))
return sorted(scores, key=lambda x: -x[1])[:top_k]四、重排序与精排
4.1 交叉编码器重排
graph LR
A["Query"] --> B["粗排结果 (100)"]
B --> C["交叉编码器"]
C --> D["精排结果 (10)"]
D --> E["LLM 生成"]
class CrossEncoderReranker: def __init__(self, model_name: str = "cross-encoder/ms-marco"): self.model = AutoModelForSequenceClassification.from_pretrained( model_name ) self.tokenizer = AutoTokenizer.from_pretrained(model_name)
def rerank(self, query: str, documents: list, top_k: int = 10): """ 交叉编码器:对 Query-Doc 对进行精细打分 """ # 构建 Query-Doc 对 pairs = [(query, doc.content) for doc in documents]
# 批量编码 inputs = self.tokenizer( pairs, padding=True, truncation=True, max_length=512, return_tensors="pt" )
with torch.no_grad(): scores = self.model(**inputs).logits.squeeze(-1)
# 按分数排序 ranked = sorted( zip(documents, scores.tolist()), key=lambda x: -x[1] )
return [doc for doc, _ in ranked[:top_k]]4.2 LLM 作为重排器
class LLM Reranker: def __init__(self, llm): self.llm = llm
def rerank_with_llm(self, query: str, documents: list, top_k: int = 5): """ 使用 LLM 进行语义重排 """ doc_context = "\n\n".join([ f"文档 {i+1}:\n{doc.content}" for i, doc in enumerate(documents) ])
prompt = f"""请根据以下文档与查询的相关性打分(1-10分):
查询:{query}
{doc_context}
请按相关性从高到低排序,只返回文档编号(如:3,1,2,5,4)"""
response = self.llm.generate(prompt)
# 解析排序结果 order = self._parse_order(response)
return [documents[i-1] for i in order[:top_k]]
def _parse_order(self, response: str) -> list: """从 LLM 输出中解析排序""" import re numbers = re.findall(r'\d+', response) return [int(n) for n in numbers[:len(documents)]]五、Context 压缩与摘要
5.1 Context 压缩策略
class ContextCompressor: def __init__(self, llm, max_tokens: int = 4000): self.llm = llm self.max_tokens = max_tokens
def compress(self, query: str, documents: list) -> str: """ Context 压缩策略: 1. 相关性过滤 2. 句子级别压缩 3. 摘要替换 """ compressed = [] current_tokens = 0
for doc in documents: # 计算相关分数 relevance = self._calc_relevance(query, doc)
if relevance < 0.3: continue # 跳过低相关文档
# 提取关键句子 key_sentences = self._extract_key_sentences(doc, query)
doc_tokens = self._count_tokens(key_sentences)
if current_tokens + doc_tokens > self.max_tokens: # 超出限制,尝试摘要 remaining = self.max_tokens - current_tokens summary = self._summarize(doc, remaining) compressed.append(summary) break
compressed.append(key_sentences) current_tokens += doc_tokens
return "\n\n".join(compressed)
def _extract_key_sentences(self, doc: Document, query: str) -> str: """提取与 Query 相关的关键句子""" sentences = doc.content.split("。")
scored = [] for sent in sentences: score = self._calc_relevance(query, sent) scored.append((sent, score))
# 返回高相关句子 top_sentences = sorted(scored, key=lambda x: -x[1])[:5] return "。".join([s for s, _ in top_sentences]) + "。"
def _summarize(self, doc: Document, max_tokens: int) -> str: """对文档进行摘要""" prompt = f"""请用不超过 {max_tokens} 个词总结以下文档的核心内容:
{doc.content}
摘要:""" return self.llm.generate(prompt)5.2 信息密度排序
class DensityBasedSelector: def select(self, query: str, documents: list, max_tokens: int) -> list: """ 基于信息密度的文档选择 信息密度 = 相关内容长度 / 总长度 """ scored_docs = []
for doc in documents: # 识别文档中与 Query 相关的段落 relevant_segments = self._find_relevant_segments(doc, query)
total_relevant = sum(len(seg) for seg in relevant_segments) density = total_relevant / len(doc.content)
# 计算信息价值分 info_score = density * math.log(len(doc.content) + 1)
scored_docs.append((doc, info_score, relevant_segments))
# 按信息价值排序 scored_docs.sort(key=lambda x: -x[1])
# 组装 Context selected = [] current_tokens = 0
for doc, score, segments in scored_docs: segment_text = "\n".join(segments) tokens = self._count_tokens(segment_text)
if current_tokens + tokens > max_tokens: continue
selected.append(segment_text) current_tokens += tokens
return selected六、引用追踪与验证
6.1 引用标注系统
class CitationTracker: def __init__(self): self.citations = []
def extract_citations(self, answer: str, documents: list) -> dict: """ 从答案中提取引用标注并验证 """ # 1. 识别答案中的引用标记 [1], [2], etc. citation_pattern = r'\[(\d+)\]' matches = re.finditer(citation_pattern, answer)
verified_citations = [] for match in matches: doc_id = int(match.group(1))
# 2. 验证引用是否与文档内容匹配 doc = documents[doc_id - 1] is_valid = self._verify_citation(answer, doc)
verified_citations.append({ "id": doc_id, "document": doc, "valid": is_valid, "position": match.span() })
return verified_citations
def _verify_citation(self, answer: str, doc: Document) -> bool: """ 验证引用准确性: 1. 答案中引用的具体事实是否在文档中 2. 引用的上下文是否匹配 """ # 提取答案中的关键事实 answer_facts = self._extract_facts(answer)
# 检查事实是否在文档中 for fact in answer_facts: if fact not in doc.content: return False
return True6.2 自引用验证
class SelfCitationVerifier: def verify(self, answer: str, retrieved_docs: list) -> tuple[bool, str]: """ 验证答案是否只基于检索到的文档 """ # 1. 提取答案中的声明 claims = self._extract_claims(answer)
# 2. 检查每个声明是否可溯源 unverified_claims = [] for claim in claims: if not self._is_grounded(claim, retrieved_docs): unverified_claims.append(claim)
if unverified_claims: warning = f"警告:以下内容无法溯源:{unverified_claims}" return False, warning
return True, ""
def _is_grounded(self, claim: str, docs: list) -> bool: """检查声明是否在文档中有依据""" for doc in docs: if claim in doc.content: return True return False七、总结
graph TB
A["RAG 优化"] --> B["Query 层"]
A --> C["检索层"]
A --> D["排序层"]
A --> E["生成层"]
B --> B1["Query 改写"]
B --> B2["HyDE"]
B --> B3["Query 扩展"]
C --> C1["向量检索"]
C --> C2["BM25"]
C --> C3["混合检索"]
D --> D1["交叉编码器"]
D --> D2["LLM 重排"]
D --> D3["密度排序"]
E --> E1["Context 压缩"]
E --> E2["引用追踪"]
E --> E3["答案验证"]
| 优化阶段 | 关键技术 | 预期收益 |
|---|---|---|
| Query 层 | 改写/HyDE/扩展 | 检索召回率提升 20-40% |
| 检索层 | 混合检索/RRF | 平衡精确性与覆盖面 |
| 排序层 | 交叉编码器/LLM 重排 | Top-K 准确率提升 30-50% |
| 生成层 | Context 压缩/引用追踪 | 幻觉减少 50%,引用准确率 90% |
支持与分享
如果这篇文章对你有帮助,欢迎支持作者或分享给更多人
部分信息可能已经过时
相关文章 智能推荐
1
向量数据库深度解析
AI 深入解析主流向量数据库的架构原理、性能对比与选型指南,涵盖 Milvus、Qdrant、Pinecone、Chroma、Weaviate。
2
RAG 检索增强生成原理
AI 深入解析 RAG 检索增强生成技术——架构原理、检索流程、Context 组装与工程实践。
3
让AI拥有知识:RAG检索增强生成详解
AI 让AI拥有知识——RAG检索增强生成详解
4
AI 工程化实践
AI AI 工程化全景导览——从提示词工程到多模态系统,梳理大模型落地的核心工程能力与知识体系
5
Embedding 与向量搜索原理
AI 深入解析文本 Embedding 模型与向量检索——模型原理、索引结构、相似度计算与工程实践。






