mobile wallpaper 1mobile wallpaper 2mobile wallpaper 3mobile wallpaper 4
322 字
1 分钟
RAG 优化策略深度解析
2025-04-24

一、RAG 优化概述#

1.1 为什么需要 RAG 优化#

graph TB subgraph "基础 RAG 问题" A["检索质量差"] B["Context 冗余"] C["引用不准确"] D["幻觉依然存在"] end subgraph "优化方向" E["Query 改写"] F["混合检索"] G["智能重排"] H["Context 压缩"] end A --> E B --> F C --> G D --> H
问题类型典型表现优化策略
检索不到关键词不匹配语义Query 改写/扩展
检索太多返回大量无关 Context混合检索 + 重排序
Context 过长超过模型上下文限制Context 压缩/摘要
引用不准答案与引用不匹配引用追踪与验证

1.2 优化架构总览#

flowchart LR A["用户 Query"] --> B["Query 改写"] B --> C["混合检索"] C --> D["粗排召回"] D --> E["精排重排序"] E --> F["Context 组装"] F --> G["LLM 生成"] G --> H["引用验证"] H --> I["最终输出"]

二、Query 改写与扩展#

2.1 Query 改写策略#

class QueryRewriter:
def __init__(self, llm):
self.llm = llm
def rewrite(self, query: str) -> str:
"""
Query 改写核心策略:
1. 隐式表述展开
2. 同义词扩展
3. 假设类型注入
"""
prompts = [
# 展开缩写和隐含概念
f"将以下查询展开为完整表述:{query}",
# 生成同义词变体
f"提供查询的同义词表达:{query}",
]
expanded = self.llm.batch_generate(prompts)
# 合并改写结果
return self._merge_rewrite(query, expanded)
def _merge_rewrite(self, original: str, rewrites: list) -> str:
# 去重合并
variants = list(set([original] + rewrites))
return " | ".join(variants)

2.2 HyDE 假设文档#

# HyDE (Hypothetical Document Embeddings)
class HyDERetriever:
def __init__(self, llm, vector_store):
self.llm = llm
self.vector_store = vector_store
def retrieve(self, query: str, top_k: int = 5):
"""
HyDE 核心思想:
1. 让 LLM 生成假设性答案
2. 用假设答案去检索(而非原始 Query)
3. 假设答案与真实文档更匹配
"""
# 1. 生成假设答案
hypothetical_doc = self.llm.generate(
f"假设你是专家,请给出以下问题的详细答案:\n{query}"
)
# 2. 用假设答案检索
results = self.vector_store.similarity_search(
hypothetical_doc,
top_k
)
# 3. 额外用原始 Query 检索
original_results = self.vector_store.similarity_search(
query,
top_k
)
# 4. 融合结果
return self._fusion_results(results, original_results)
def _fusion_results(self, hyde_results, original_results, k=60):
"""RRF 融合"""
scores = {}
for i, doc in enumerate(hyde_results):
scores[doc.id] = scores.get(doc.id, 0) + 1 / (k + i + 1)
for i, doc in enumerate(original_results):
scores[doc.id] = scores.get(doc.id, 0) + 1 / (k + i + 1)
return sorted(scores.items(), key=lambda x: -x[1])[:top_k]

2.3 Query 扩展技术#

class QueryExpander:
def expand(self, query: str) -> list[str]:
"""
多角度 Query 扩展
"""
expansions = []
# 1. 核心词提取 + 同义词
core_terms = self._extract_core_terms(query)
for term in core_terms:
synonyms = self._get_synonyms(term)
for syn in synonyms:
expansions.append(query.replace(term, syn))
# 2. 下位词扩展(具体化)
hyponyms = self._get_hyponyms(core_terms)
for hypo in hyponyms:
expansions.append(f"{query} {hypo}")
# 3. 上位词扩展(泛化)
hypernyms = self._get_hypernyms(core_terms)
for hyper in hypernyms:
expansions.append(query.replace(core_terms[0], hyper))
return list(set(expansions))
def _get_synonyms(self, term: str) -> list:
"""使用词向量找相似词"""
term_vec = self.embedder.encode([term])
similar = self.vector_index.search(term_vec, top_k=5)
return [s for s in similar if s != term]

三、混合检索策略#

3.1 混合检索架构#

graph TB A["Query"] --> B["向量检索"] A --> C["BM25 检索"] A --> D["稀疏检索"] A --> E["密集检索"] B --> F["向量结果集"] C --> G["BM25 结果集"] D --> H["稀疏结果集"] E --> I["密集结果集"] F --> J["结果融合"] G --> J H --> J I --> J J --> K["RRF 融合"] K --> L["Top-K 召回"]

3.2 BM25 + 向量混合#

class HybridRetriever:
def __init__(self, vector_store, bm25_index):
self.vector_store = vector_store
self.bm25_index = bm25_index
def retrieve(self, query: str, top_k: int = 10):
# 1. 向量检索
vector_results = self.vector_store.search(
self.embed(query),
top_k * 2
)
# 2. BM25 检索
bm25_results = self.bm25_index.search(
query,
top_k * 2
)
# 3. RRF 融合
fused = self._rrf_fusion(
[vector_results, bm25_results],
k=60
)
return fused[:top_k]
def _rrf_fusion(self, result_lists: list, k: int = 60) -> list:
"""Reciprocal Rank Fusion"""
scores = {}
for results in result_lists:
for i, doc in enumerate(results):
doc_id = doc.id
scores[doc_id] = scores.get(doc_id, 0) + 1 / (k + i + 1)
return sorted(scores.items(), key=lambda x: -x[1])

3.3 稀疏检索与密集检索#

# SPLADE 稀疏检索
class SPLADERetriever:
def __init__(self, model):
self.model = model
self.model.eval()
def encode(self, text: str) -> dict:
"""
SPLADE 输出稀疏向量(词权重)
例如:{"python": 2.5, "编程": 1.8, "语言": 0.9}
"""
with torch.no_grad():
outputs = self.model(**self.tokenizer(text))
weights = torch.max(
torch.log(1 + torch.relu(outputs.logits)),
dim=-1
)
# 转换为稀疏表示
sparse_vec = {}
for idx, weight in weights[0].items():
if weight > 0.01:
sparse_vec[self.tokenizer.decode([idx])] = weight.item()
return sparse_vec
def search(self, query: str, documents: list, top_k: int):
"""稀疏向量点积"""
query_vec = self.encode(query)
scores = []
for doc in documents:
doc_vec = self.encode(doc)
score = sum(
query_vec.get(k, 0) * v
for k, v in doc_vec.items()
)
scores.append((doc, score))
return sorted(scores, key=lambda x: -x[1])[:top_k]

四、重排序与精排#

4.1 交叉编码器重排#

graph LR A["Query"] --> B["粗排结果 (100)"] B --> C["交叉编码器"] C --> D["精排结果 (10)"] D --> E["LLM 生成"]
class CrossEncoderReranker:
def __init__(self, model_name: str = "cross-encoder/ms-marco"):
self.model = AutoModelForSequenceClassification.from_pretrained(
model_name
)
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
def rerank(self, query: str, documents: list, top_k: int = 10):
"""
交叉编码器:对 Query-Doc 对进行精细打分
"""
# 构建 Query-Doc 对
pairs = [(query, doc.content) for doc in documents]
# 批量编码
inputs = self.tokenizer(
pairs,
padding=True,
truncation=True,
max_length=512,
return_tensors="pt"
)
with torch.no_grad():
scores = self.model(**inputs).logits.squeeze(-1)
# 按分数排序
ranked = sorted(
zip(documents, scores.tolist()),
key=lambda x: -x[1]
)
return [doc for doc, _ in ranked[:top_k]]

4.2 LLM 作为重排器#

class LLM Reranker:
def __init__(self, llm):
self.llm = llm
def rerank_with_llm(self, query: str, documents: list, top_k: int = 5):
"""
使用 LLM 进行语义重排
"""
doc_context = "\n\n".join([
f"文档 {i+1}:\n{doc.content}"
for i, doc in enumerate(documents)
])
prompt = f"""请根据以下文档与查询的相关性打分(1-10分):
查询:{query}
{doc_context}
请按相关性从高到低排序,只返回文档编号(如:3,1,2,5,4)
"""
response = self.llm.generate(prompt)
# 解析排序结果
order = self._parse_order(response)
return [documents[i-1] for i in order[:top_k]]
def _parse_order(self, response: str) -> list:
"""从 LLM 输出中解析排序"""
import re
numbers = re.findall(r'\d+', response)
return [int(n) for n in numbers[:len(documents)]]

五、Context 压缩与摘要#

5.1 Context 压缩策略#

class ContextCompressor:
def __init__(self, llm, max_tokens: int = 4000):
self.llm = llm
self.max_tokens = max_tokens
def compress(self, query: str, documents: list) -> str:
"""
Context 压缩策略:
1. 相关性过滤
2. 句子级别压缩
3. 摘要替换
"""
compressed = []
current_tokens = 0
for doc in documents:
# 计算相关分数
relevance = self._calc_relevance(query, doc)
if relevance < 0.3:
continue # 跳过低相关文档
# 提取关键句子
key_sentences = self._extract_key_sentences(doc, query)
doc_tokens = self._count_tokens(key_sentences)
if current_tokens + doc_tokens > self.max_tokens:
# 超出限制,尝试摘要
remaining = self.max_tokens - current_tokens
summary = self._summarize(doc, remaining)
compressed.append(summary)
break
compressed.append(key_sentences)
current_tokens += doc_tokens
return "\n\n".join(compressed)
def _extract_key_sentences(self, doc: Document, query: str) -> str:
"""提取与 Query 相关的关键句子"""
sentences = doc.content.split("。")
scored = []
for sent in sentences:
score = self._calc_relevance(query, sent)
scored.append((sent, score))
# 返回高相关句子
top_sentences = sorted(scored, key=lambda x: -x[1])[:5]
return "。".join([s for s, _ in top_sentences]) + "。"
def _summarize(self, doc: Document, max_tokens: int) -> str:
"""对文档进行摘要"""
prompt = f"""请用不超过 {max_tokens} 个词总结以下文档的核心内容:
{doc.content}
摘要:"""
return self.llm.generate(prompt)

5.2 信息密度排序#

class DensityBasedSelector:
def select(self, query: str, documents: list, max_tokens: int) -> list:
"""
基于信息密度的文档选择
信息密度 = 相关内容长度 / 总长度
"""
scored_docs = []
for doc in documents:
# 识别文档中与 Query 相关的段落
relevant_segments = self._find_relevant_segments(doc, query)
total_relevant = sum(len(seg) for seg in relevant_segments)
density = total_relevant / len(doc.content)
# 计算信息价值分
info_score = density * math.log(len(doc.content) + 1)
scored_docs.append((doc, info_score, relevant_segments))
# 按信息价值排序
scored_docs.sort(key=lambda x: -x[1])
# 组装 Context
selected = []
current_tokens = 0
for doc, score, segments in scored_docs:
segment_text = "\n".join(segments)
tokens = self._count_tokens(segment_text)
if current_tokens + tokens > max_tokens:
continue
selected.append(segment_text)
current_tokens += tokens
return selected

六、引用追踪与验证#

6.1 引用标注系统#

class CitationTracker:
def __init__(self):
self.citations = []
def extract_citations(self, answer: str, documents: list) -> dict:
"""
从答案中提取引用标注并验证
"""
# 1. 识别答案中的引用标记 [1], [2], etc.
citation_pattern = r'\[(\d+)\]'
matches = re.finditer(citation_pattern, answer)
verified_citations = []
for match in matches:
doc_id = int(match.group(1))
# 2. 验证引用是否与文档内容匹配
doc = documents[doc_id - 1]
is_valid = self._verify_citation(answer, doc)
verified_citations.append({
"id": doc_id,
"document": doc,
"valid": is_valid,
"position": match.span()
})
return verified_citations
def _verify_citation(self, answer: str, doc: Document) -> bool:
"""
验证引用准确性:
1. 答案中引用的具体事实是否在文档中
2. 引用的上下文是否匹配
"""
# 提取答案中的关键事实
answer_facts = self._extract_facts(answer)
# 检查事实是否在文档中
for fact in answer_facts:
if fact not in doc.content:
return False
return True

6.2 自引用验证#

class SelfCitationVerifier:
def verify(self, answer: str, retrieved_docs: list) -> tuple[bool, str]:
"""
验证答案是否只基于检索到的文档
"""
# 1. 提取答案中的声明
claims = self._extract_claims(answer)
# 2. 检查每个声明是否可溯源
unverified_claims = []
for claim in claims:
if not self._is_grounded(claim, retrieved_docs):
unverified_claims.append(claim)
if unverified_claims:
warning = f"警告:以下内容无法溯源:{unverified_claims}"
return False, warning
return True, ""
def _is_grounded(self, claim: str, docs: list) -> bool:
"""检查声明是否在文档中有依据"""
for doc in docs:
if claim in doc.content:
return True
return False

七、总结#

graph TB A["RAG 优化"] --> B["Query 层"] A --> C["检索层"] A --> D["排序层"] A --> E["生成层"] B --> B1["Query 改写"] B --> B2["HyDE"] B --> B3["Query 扩展"] C --> C1["向量检索"] C --> C2["BM25"] C --> C3["混合检索"] D --> D1["交叉编码器"] D --> D2["LLM 重排"] D --> D3["密度排序"] E --> E1["Context 压缩"] E --> E2["引用追踪"] E --> E3["答案验证"]
优化阶段关键技术预期收益
Query 层改写/HyDE/扩展检索召回率提升 20-40%
检索层混合检索/RRF平衡精确性与覆盖面
排序层交叉编码器/LLM 重排Top-K 准确率提升 30-50%
生成层Context 压缩/引用追踪幻觉减少 50%,引用准确率 90%

支持与分享

如果这篇文章对你有帮助,欢迎支持作者或分享给更多人

RAG 优化策略深度解析
https://blog.souloss.com/posts/ai-engineering/rag-optimization/
作者
Souloss
发布于
2025-04-24
许可协议
CC BY-NC-SA 4.0

部分信息可能已经过时