final_project / src /rag_chain.py
dnj0's picture
Upload 7 files
835ecb4 verified
from langchain_openai import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain_core.prompts import PromptTemplate
from typing import Optional
import os
class RAGChain:
"""RAG chain using OpenAI API with Russian language support."""
def __init__(self,
retriever,
model_name: str = "gpt-4o-mini",
temperature: float = 0.3,
api_key: Optional[str] = None):
"""
Initialize RAG chain.
Args:
retriever: LangChain retriever (from vector store)
model_name: OpenAI model name
temperature: Temperature for LLM
api_key: OpenAI API key
"""
self.llm = ChatOpenAI(
model_name=model_name,
temperature=temperature,
api_key=api_key or os.getenv("OPENAI_API_KEY"),
max_tokens=1024
)
self.retriever = retriever
# Custom prompt for Russian language
self.prompt_template = PromptTemplate(
template="""Вы - полезный ассистент, специализирующийся на анализе документов.
Используя следующий контекст из документов, ответьте на вопрос.
Контекст:
{context}
Вопрос: {question}
Инструкции:
1. Ответьте только на основе информации из контекста
2. Если информация не найдена в контексте, скажите "Информация не найдена в документах"
3. Ответьте на русском языке
4. Будьте кратким и точным
5. Цитируйте источники если возможно
Ответ:""",
input_variables=["context", "question"]
)
# Create RetrievalQA chain
self.chain = RetrievalQA.from_chain_type(
llm=self.llm,
chain_type="stuff",
retriever=self.retriever,
return_source_documents=True,
chain_type_kwargs={"prompt": self.prompt_template}
)
def query(self, question: str) -> dict:
"""
Query the RAG chain.
Args:
question: User question (can be in any language)
Returns:
Dictionary with answer and source documents
"""
try:
result = self.chain.invoke({"query": question})
return {
"answer": result.get("result", ""),
"sources": [
{
"content": doc.page_content[:200], # First 200 chars
"metadata": doc.metadata
}
for doc in result.get("source_documents", [])
]
}
except Exception as e:
return {
"answer": f"Ошибка при обработке запроса: {str(e)}",
"sources": []
}
def query_with_context(self, question: str, context_limit: int = 5) -> dict:
"""
Query with explicit context retrieval.
Args:
question: User question
context_limit: Number of context chunks to retrieve
Returns:
Dictionary with answer and context
"""
# Retrieve relevant documents
relevant_docs = self.retriever.get_relevant_documents(
question,
search_kwargs={"k": context_limit}
)
# Format context
context = "\n\n".join([
f"Источник: {doc.metadata}\n{doc.page_content}"
for doc in relevant_docs
])
# Create prompt
prompt = self.prompt_template.format(context=context, question=question)
# Get response
response = self.llm.invoke(prompt)
return {
"answer": response.content,
"context_documents": [
{
"content": doc.page_content[:300],
"metadata": doc.metadata
}
for doc in relevant_docs
]
}