mobile wallpaper 1mobile wallpaper 2mobile wallpaper 3mobile wallpaper 4
443 字
1 分钟
Agent 构建方法论深度解析
2025-06-01

一、Agent 概述#

1.1 什么是 LLM Agent#

graph TB subgraph "Agent 核心组件" A["LLM 大脑"] B["Tool Use 工具调用"] C["Planning 规划"] D["Memory 记忆"] end subgraph "Agent 能力" E["自主决策"] F["多步推理"] G["环境交互"] H["自我改进"] end A --> B A --> C A --> D B --> E C --> F D --> G E --> H
组件作用关键技术
LLM推理与决策GPT-4, Claude, 开源模型
Tool Use扩展能力边界Function Calling, API 调用
Planning分解任务、制定执行计划CoT, ReAct, Tree of Thought
Memory存储上下文与历史经验Vector DB, KV Store

1.2 Agent vs 普通 LLM 调用#

# 普通 LLM 调用
def普通调用(user_input):
response = llm.generate(user_input)
return response
# Agent 调用
def agent调用(user_input):
# 1. 理解任务
plan = llm.plan(user_input)
# 2. 执行计划
for step in plan.steps:
if step.needs_tool:
result = execute_tool(step.tool, step.args)
plan.add_context(result)
else:
result = llm.reason(step.task)
# 3. 返回结果
return plan.final_answer

二、Tool Use 工具调用#

2.1 Tool Use 架构#

graph LR A["用户 Query"] --> B["LLM 判断"] B --> C{"需要调用工具?"} C -->|是| D["选择工具"] D --> E["生成参数"] E --> F["执行工具"] F --> G["解析结果"] G --> H["继续推理"] C -->|否| I["直接回答"]

2.2 Function Calling 实现#

from openai import OpenAI
client = OpenAI()
# 定义工具 Schema
tools = [
{
"type": "function",
"function": {
"name": "search_code",
"description": "搜索代码库中的代码",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "搜索关键词"
},
"language": {
"type": "string",
"description": "编程语言"
}
},
"required": ["query"]
}
}
},
{
"type": "function",
"function": {
"name": "read_file",
"description": "读取文件内容",
"parameters": {
"type": "object",
"properties": {
"path": {
"type": "string",
"description": "文件路径"
},
"line_start": {
"type": "integer",
"description": "起始行号"
},
"line_end": {
"type": "integer",
"description": "结束行号"
}
},
"required": ["path"]
}
}
}
]
# Agent 执行
def agent_execute(query):
messages = [{"role": "user", "content": query}]
while True:
response = client.chat.completions.create(
model="gpt-4-turbo",
messages=messages,
tools=tools,
tool_choice="auto"
)
message = response.choices[0].message
if message.tool_calls:
# LLM 请求调用工具
for tool_call in message.tool_calls:
tool_name = tool_call.function.name
tool_args = json.loads(tool_call.function.arguments)
# 执行工具
result = execute_tool(tool_name, tool_args)
# 添加工具结果到上下文
messages.append({
"role": "tool",
"tool_call_id": tool_call.id,
"content": result
})
else:
# LLM 直接回答
return message.content
def execute_tool(name, args):
if name == "search_code":
return search_code(**args)
elif name == "read_file":
return read_file(**args)

2.3 工具设计原则#

# 好的工具设计
good_tools = [
{
"name": "get_weather",
"description": "获取指定城市的天气预报",
"parameters": {
"city": {"type": "string", "description": "城市名称"},
"date": {"type": "string", "description": "日期,格式 YYYY-MM-DD"}
},
# 具体说明输入输出
},
{
"name": "search_database",
"description": "在数据库中搜索用户信息",
"parameters": {
"table": {"type": "string", "description": "表名"},
"conditions": {"type": "object", "description": "查询条件"}
},
# 参数结构清晰
}
]
# 不好的工具设计
bad_tools = [
{
"name": "query",
"description": "执行查询",
# 描述模糊
},
{
"name": "do_something",
"description": "做某事",
# 名称无意义
}
]

三、ReAct 推理架构#

3.1 ReAct 核心思想#

graph TB A["Thought"] --> B["Action"] B --> C["Observation"] C --> D{"完成?"} D -->|否| A D -->|是| E["Final Answer"] style A fill:#90EE90 style B fill:#FFD700 style C fill:#87CEEB

ReAct = Reasoning + Acting:交替进行推理和行动,直到任务完成。

class ReActAgent:
def __init__(self, llm, tools, max_iterations=10):
self.llm = llm
self.tools = tools
self.max_iterations = max_iterations
def run(self, task: str) -> str:
"""ReAct 主循环"""
observations = []
for i in range(self.max_iterations):
# 1. Thought: LLM 分析当前状态,决定下一步
thought = self.llm.think(
task=task,
history=observations,
tools=self.tools
)
if thought.is_finished:
return thought.answer
# 2. Action: 执行工具调用
if thought.needs_tool:
action_result = self.execute_tool(
thought.tool_name,
thought.tool_args
)
observations.append({
"thought": thought,
"action": thought.action,
"observation": action_result
})
else:
# 直接推理回答
return thought.answer
return "任务未在限制次数内完成"
def execute_tool(self, tool_name, args):
"""执行工具"""
for tool in self.tools:
if tool.name == tool_name:
return tool.execute(**args)
raise ValueError(f"Unknown tool: {tool_name}")

3.2 ReAct Prompt 模板#

REACT_PROMPT = """你是一个 AI Agent,需要完成用户任务。
可用工具:
{tool_schemas}
执行流程:
1. Thought: 分析当前情况,决定是否需要调用工具
2. Action: 如果需要,调用工具并传入参数
3. Observation: 观察工具返回结果
4. 重复直到任务完成
当前任务:{task}
{history}
请开始执行:
"""
# 带历史记录的 ReAct
def format_history(observations):
history = ""
for i, obs in enumerate(observations):
history += f"步骤 {i+1}:\n"
history += f"思考:{obs['thought']}\n"
history += f"行动:{obs['action']}\n"
history += f"观察:{obs['observation']}\n\n"
return history

3.3 Plan-and-Execute 模式#

class PlanAndExecuteAgent:
"""
计划-执行分离模式:
1. 先生成完整执行计划
2. 然后逐个执行计划中的步骤
"""
def plan(self, task: str) -> list:
"""生成执行计划"""
prompt = f"""分析以下任务,分解为具体执行步骤:
任务:{task}
请列出执行步骤(每步一个具体行动):
"""
steps = self.llm.generate(prompt)
return self.parse_steps(steps)
def execute(self, task: str):
"""执行计划"""
# 1. 生成计划
plan = self.plan(task)
# 2. 逐个执行
context = {}
for i, step in enumerate(plan):
print(f"执行步骤 {i+1}: {step}")
# 根据上下文执行
result = self.execute_step(step, context)
context[step] = result
return context
def execute_step(self, step, context):
"""执行单个步骤"""
if "search" in step:
return self.search(context)
elif "read" in step:
return self.read_file(context)
else:
return self.llm.reason(step, context)

四、Planning 规划能力#

4.1 Tree of Thought#

graph TB A["问题"] --> B["方案 A"] A --> C["方案 B"] A --> D["方案 C"] B --> B1["步骤 A1"] B --> B2["步骤 A2"] C --> C1["步骤 B1"] C --> C2["步骤 B2"] D --> D1["步骤 C1"] D --> D2["步骤 C2"] B1 --> BE["评估 A"] B2 --> BE C1 --> CE["评估 B"] C2 --> CE D1 --> DE["评估 C"] D2 --> DE BE --> FINAL["选择最优方案"] CE --> FINAL DE --> FINAL
class TreeOfThought:
def __init__(self, llm, num_thoughts=3, max_depth=3):
self.llm = llm
self.num_thoughts = num_thoughts
self.max_depth = max_depth
def solve(self, problem: str) -> str:
"""ToT 求解"""
root = ThoughtNode(problem, depth=0)
# BFS 探索
queue = [root]
best_solution = None
best_score = -1
while queue:
node = queue.pop(0)
if node.depth >= self.max_depth:
# 评估叶节点
score = self.evaluate(node)
if score > best_score:
best_score = score
best_solution = node
continue
# 生成多个候选
candidates = self.generate_candidates(node)
for candidate in candidates:
queue.append(candidate)
return best_solution.path
def generate_candidates(self, node) -> list:
"""生成候选子节点"""
prompt = f"""当前问题:{node.problem}
当前方案:{node.solution}
请提出 {self.num_thoughts} 种不同的改进方案或下一步行动:
"""
proposals = self.llm.generate(prompt)
return [ThoughtNode(node, p) for p in proposals]
def evaluate(self, node) -> float:
"""评估节点质量"""
prompt = f"""评估以下方案的质量(0-10分):
方案:{node.solution}
评分标准:
- 可行性(0-3分)
- 完整性(0-3分)
- 创新性(0-4分)
评分:"""
return self.llm.generate(prompt)

4.2 Self-Discovery 架构#

class SelfDiscoverAgent:
"""
Self-Discover: Agent 自己发现解决任务的最佳策略
"""
def __init__(self, llm):
self.llm = llm
def discover(self, task: str) -> str:
# 1. 列出可能的策略
strategies = self.list_strategies(task)
# 2. 选择相关策略
relevant = self.select_relevant(strategies, task)
# 3. 组合为执行计划
plan = self.compose_plan(relevant, task)
# 4. 执行
return self.execute(plan)
def list_strategies(self, task):
prompt = f"""对于以下任务,列出所有可能的解决策略:
任务:{task}
策略类型:
1. 分解为子问题
2. 类比推理
3. 逐步验证
4. 反向思考
5. 广度优先 vs 深度优先
"""
return self.llm.generate(prompt)

五、Memory 记忆系统#

5.1 记忆类型#

graph TB subgraph "记忆层次" A["长期记忆"] B["短期记忆"] C["工作记忆"] end subgraph "技术实现" A --> D["Vector DB"] B --> E["KV Store"] C --> F["Context Window"] end subgraph "作用" D --> G["经验知识"] E --> H["会话历史"] F --> I["当前推理"] end
记忆类型容量持久性用途
工作记忆~128K tokens当前会话当前推理上下文
短期记忆数十条对话当前会话会话历史
长期记忆近乎无限持久化跨会话知识与经验

5.2 记忆实现#

class AgentMemory:
def __init__(self):
# 短期记忆:最近对话
self.short_term = deque(maxlen=20)
# 长期记忆:向量数据库
self.long_term = VectorStore()
# 工作记忆:当前上下文
self.working_memory = {}
def add_turn(self, role: str, content: str):
"""添加对话"""
self.short_term.append({"role": role, "content": content})
def add_experience(self, experience: dict):
"""添加经验到长期记忆"""
vector = self.embed(experience["content"])
self.long_term.add(
id=experience["id"],
vector=vector,
content=experience["content"],
metadata=experience.get("metadata", {})
)
def retrieve(self, query: str, top_k: int = 3) -> list:
"""检索相关记忆"""
query_vector = self.embed(query)
return self.long_term.search(query_vector, top_k)
def summarize_long_term(self):
"""定期总结长期记忆,提取重要信息"""
all_memories = self.long_term.get_all()
prompt = f"""总结以下经验,提取关键模式:
{all_memories}
总结:
"""
summary = self.llm.generate(prompt)
return summary

5.3 记忆增强检索#

class MemoryAugmentedRetrieval:
def __init__(self, memory, retriever):
self.memory = memory
self.retriever = retriever
def retrieve_with_memory(self, query: str, context: dict) -> list:
"""结合记忆的检索"""
# 1. 基础检索
base_results = self.retriever.search(query, top_k=5)
# 2. 相关记忆检索
memory_results = self.memory.retrieve(query, top_k=3)
# 3. 融合结果
fused = self._fusion_results(base_results, memory_results)
# 4. 重排序
reranked = self._rerank(query, fused, context)
return reranked
def _fusion_results(self, base, memory):
"""融合检索结果"""
scores = {}
for i, item in enumerate(base):
scores[item.id] = scores.get(item.id, 0) + 1 / (i + 1)
for i, item in enumerate(memory):
scores[item.id] = scores.get(item.id, 0) + 1 / (i + 1) * 0.5
return sorted(scores.items(), key=lambda x: -x[1])

六、主流框架对比#

6.1 框架特性对比#

框架开发方核心优势学习曲线
LangChainLangChain AI生态全面,组件丰富中等
LlamaIndexLlamaIndex数据连接强,RAG 友好中等
AutoGPTSignificant-Gravitas自主性强,任务分解好较高
MetaGPTOpenBMB多 Agent 协作,SOP 驱动较高
CrewAICrewAI多 Agent 编排,角色清晰较低

6.2 LangChain Agent 示例#

from langchain.agents import AgentExecutor, create_react_agent
from langchain.tools import Tool
from langchain_openai import ChatOpenAI
# 定义工具
tools = [
Tool(
name="search",
func=search_code,
description="搜索代码库中的代码"
),
Tool(
name="read",
func=read_file,
description="读取文件内容"
)
]
# 创建 Agent
llm = ChatOpenAI(model="gpt-4-turbo")
agent = create_react_agent(llm, tools, prompt=REACT_PROMPT)
# 执行
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
result = agent_executor.invoke({"input": "查找处理用户认证的代码"})

6.3 CrewAI 示例#

from crewai import Agent, Task, Crew
# 定义 Agent(角色)
researcher = Agent(
role="研究员",
goal="收集相关信息",
backstory="你是一个专业的研究员,擅长信息收集",
tools=[search_tool, browse_tool]
)
writer = Agent(
role="作家",
goal="撰写报告",
backstory="你是一个专业的技术作家",
tools=[write_tool]
)
# 定义任务
research_task = Task(
description="研究 AI Agent 的最新发展",
agent=researcher
)
write_task = Task(
description="撰写研究报告",
agent=writer,
context=[research_task] # 依赖研究任务
)
# 创建 Crew
crew = Crew(agents=[researcher, writer], tasks=[research_task, write_task])
result = crew.kickoff()

七、总结#

graph TB A["LLM Agent"] --> B["Tool Use"] A --> C["Planning"] A --> D["Memory"] B --> B1["Function Calling"] B --> B2["API Integration"] C --> C1["ReAct"] C --> C2["Tree of Thought"] D --> D1["Short-term"] D --> D2["Long-term"] A --> E["框架选择"] E --> E1["LangChain: 全面"] E --> E2["LlamaIndex: 数据"] E --> E3["CrewAI: 多Agent"]
能力实现方式关键考虑
工具调用Function Calling / Tool工具设计要清晰、幂等
推理规划ReAct / ToT根据任务复杂度选择
记忆系统Vector DB + KV Store平衡检索精度与速度
多 AgentCrewAI / MetaGPT定义清晰的角色与协作流程

支持与分享

如果这篇文章对你有帮助,欢迎支持作者或分享给更多人

Agent 构建方法论深度解析
https://blog.souloss.com/posts/ai-engineering/agent-building/
作者
Souloss
发布于
2025-06-01
许可协议
CC BY-NC-SA 4.0

部分信息可能已经过时