Skip to content

LangChain实战

概述

LangChain是一个强大的框架,用于构建基于大语言模型(LLM)的应用程序。它提供了一套工具和抽象,帮助开发者快速构建复杂的AI应用,如聊天机器人、文档问答系统、智能代理等。

LangChain的核心优势:

  • 模块化设计 - 组件可自由组合和扩展
  • 多模型支持 - 支持OpenAI、Claude、国产大模型等
  • 丰富的工具 - 内置大量工具和集成
  • 生产就绪 - 提供完整的开发和部署方案

核心概念

Chain(链)

Chain是LangChain的核心概念,用于将多个组件串联起来执行复杂任务。

python
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

model = ChatOpenAI(model="gpt-4o-mini")
prompt = ChatPromptTemplate.from_template("翻译以下文本为英文:{text}")
parser = StrOutputParser()

chain = prompt | model | parser

result = chain.invoke({"text": "你好,世界"})
print(result)

Prompt模板

Prompt模板用于动态生成提示词。

python
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

prompt = ChatPromptTemplate.from_messages([
    ("system", "你是一个专业的{role},请用专业但易懂的方式回答问题。"),
    ("user", "{question}")
])

formatted = prompt.format(
    role="数据科学家",
    question="什么是过拟合?"
)

Memory(记忆)

Memory用于保存对话历史和上下文信息。

python
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

store = {}

def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

chain_with_history = RunnableWithMessageHistory(
    chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history"
)

核心功能

LCEL(LangChain Expression Language)

LCEL是LangChain的声明式语言,用于组合组件。

python
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

model = ChatOpenAI(model="gpt-4o-mini")
prompt = ChatPromptTemplate.from_template("{topic}的三个关键点是什么?")

chain = prompt | model | StrOutputParser()

result = chain.invoke({"topic": "机器学习"})

工具调用

LangChain支持工具调用,让模型能够执行实际操作。

python
from langchain_core.tools import tool
from langchain_openai import ChatOpenAI

@tool
def multiply(a: int, b: int) -> int:
    """Multiply two numbers."""
    return a * b

@tool
def add(a: int, b: int) -> int:
    """Add two numbers."""
    return a + b

tools = [multiply, add]
model = ChatOpenAI(model="gpt-4o-mini")
model_with_tools = model.bind_tools(tools)

response = model_with_tools.invoke("计算 3 * 5 + 2")
print(response.tool_calls)

RAG实现

使用LangChain构建RAG应用。

python
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_openai import ChatOpenAI
from langchain.chains import RetrievalQA

loader = TextLoader("documents/knowledge.txt")
documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
texts = text_splitter.split_documents(documents)

embeddings = OpenAIEmbeddings()
vectorstore = Chroma.from_documents(texts, embeddings)

retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

llm = ChatOpenAI(model="gpt-4o-mini")
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True
)

result = qa_chain({"query": "什么是机器学习?"})
print(result["result"])

使用示例

示例1:构建智能对话机器人

python
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.output_parsers import StrOutputParser
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

model = ChatOpenAI(model="gpt-4o-mini", temperature=0.7)

prompt = ChatPromptTemplate.from_messages([
    ("system", "你是一个友好的AI助手,名字叫小智。请用简洁、准确的方式回答问题。"),
    MessagesPlaceholder(variable_name="history"),
    ("user", "{input}")
])

chain = prompt | model | StrOutputParser()

store = {}

def get_session_history(session_id: str):
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

chain_with_history = RunnableWithMessageHistory(
    chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="history"
)

while True:
    user_input = input("你: ")
    if user_input.lower() in ["退出", "exit", "quit"]:
        break
    
    response = chain_with_history.invoke(
        {"input": user_input},
        config={"configurable": {"session_id": "user_001"}}
    )
    print(f"小智: {response}\n")

示例2:文档问答系统

python
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

loader = PyPDFLoader("documents/report.pdf")
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
splits = text_splitter.split_documents(docs)

vectorstore = FAISS.from_documents(splits, OpenAIEmbeddings())
retriever = vectorstore.as_retriever()

llm = ChatOpenAI(model="gpt-4o-mini")

system_prompt = """你是一个专业的文档助手。请根据以下上下文回答问题:
{context}"""

prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("user", "{input}")
])

question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

response = rag_chain.invoke({"input": "这份报告的主要结论是什么?"})
print(response["answer"])

示例3:Agent开发

python
from langchain_openai import ChatOpenAI
from langchain.agents import create_tool_calling_agent, AgentExecutor
from langchain_core.tools import tool
from langchain_core.prompts import ChatPromptTemplate

@tool
def get_weather(city: str) -> str:
    """获取指定城市的天气信息"""
    weather_data = {
        "北京": "晴天,温度15-25°C",
        "上海": "多云,温度18-26°C",
        "深圳": "小雨,温度22-28°C"
    }
    return weather_data.get(city, f"未找到{city}的天气信息")

@tool
def calculate(expression: str) -> str:
    """计算数学表达式"""
    try:
        result = eval(expression)
        return f"计算结果:{result}"
    except Exception as e:
        return f"计算错误:{str(e)}"

tools = [get_weather, calculate]
llm = ChatOpenAI(model="gpt-4o-mini")

prompt = ChatPromptTemplate.from_messages([
    ("system", "你是一个智能助手,可以使用工具帮助用户。"),
    ("user", "{input}"),
    ("placeholder", "{agent_scratchpad}")
])

agent = create_tool_calling_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

result = agent_executor.invoke({
    "input": "北京今天的天气怎么样?如果温度是20度,那么20*2是多少?"
})
print(result["output"])

最佳实践

1. 模块化设计

将复杂功能拆分为独立的Chain和组件。

python
def create_translation_chain():
    prompt = ChatPromptTemplate.from_template("翻译为{language}{text}")
    return prompt | ChatOpenAI() | StrOutputParser()

def create_summary_chain():
    prompt = ChatPromptTemplate.from_template("总结以下内容:{text}")
    return prompt | ChatOpenAI() | StrOutputParser()

translation_chain = create_translation_chain()
summary_chain = create_summary_chain()

2. 错误处理

添加完善的错误处理机制。

python
from langchain_core.runnables import RunnableLambda

def safe_invoke(chain, input_data):
    try:
        result = chain.invoke(input_data)
        return {"success": True, "result": result}
    except Exception as e:
        return {"success": False, "error": str(e)}

chain_with_error_handling = RunnableLambda(safe_invoke)

3. 成本控制

使用回调函数监控Token使用。

python
from langchain_community.callbacks import get_openai_callback

with get_openai_callback() as cb:
    result = chain.invoke({"input": "你好"})
    print(f"总Token数:{cb.total_tokens}")
    print(f"总成本:${cb.total_cost}")

小结

LangChain是一个功能强大的LLM应用开发框架,通过本章节的学习,你应该掌握了:

  1. 核心概念 - Chain、Prompt模板、Memory等基础组件
  2. LCEL语法 - 使用管道符组合组件的声明式语法
  3. 工具调用 - 让模型能够执行实际操作
  4. RAG实现 - 构建文档问答系统
  5. Agent开发 - 创建能够自主决策的智能代理

LangChain的优势在于其模块化和可扩展性,适合快速构建生产级AI应用。建议在实际项目中多实践,逐步掌握高级功能和最佳实践。