chatAPI / rag_api.py
jimytech's picture
Update rag_api.py
9e0e903 verified
import os
import requests
import shutil
from langchain_community.vectorstores import FAISS
from fastapi import FastAPI
from pydantic import BaseModel
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import PromptTemplate
from langchain_groq import ChatGroq
# 1. VARIABLES DE ENTORNO Y CACHÉ
TEMP_CACHE_DIR = '/tmp/huggingface_cache'
os.environ['TRANSFORMERS_CACHE'] = TEMP_CACHE_DIR
os.environ['HF_HOME'] = TEMP_CACHE_DIR
os.environ['SENTENCE_TRANSFORMERS_HOME'] = TEMP_CACHE_DIR
os.makedirs(TEMP_CACHE_DIR, exist_ok=True)
# 2. CONFIGURACIÓN DE RUTAS
URL_FAISS = "https://drive.google.com/uc?export=download&id=1hiVycS4DQHO1MBdC-L_z1TXA6sJO_Y-r"
URL_PKL = "https://drive.google.com/uc?export=download&id=1vbG8unx88Kb5jn7puGv1gqSM4S6rIUQC"
DOWNLOAD_DIR = "/tmp/db_faiss"
DB_FAISS_PATH = DOWNLOAD_DIR
# 3. PROMPTS
CONDENSE_PROMPT = PromptTemplate(
template="""Dada la conversación y la pregunta, reescríbela para que sea independiente y clara sobre la UPT Aragua.
Historial: {chat_history}
Pregunta: {question}
Pregunta reescrita:""",
input_variables=["chat_history", "question"]
)
INTENT_PROMPT = PromptTemplate(
template="Categoriza el mensaje en: SALUDO, UNIVERSIDAD u OTRO. Responde solo la palabra. Mensaje: {query}",
input_variables=["query"]
)
SALUDO_PROMPT = PromptTemplate(
template="Eres UPTA bot. Saluda cordialmente. Mensaje: {query}",
input_variables=["query"]
)
RAG_PROMPT = PromptTemplate(
template="""Eres UPTA bot. Responde usando solo el contexto.
Contexto: {context}
Pregunta: {question}
Respuesta:""",
input_variables=["context", "question"]
)
# 4. MODELOS
class QueryRequest(BaseModel):
query: str
history: list = []
def download_file(url, local_path):
headers = {'User-Agent': 'Mozilla/5.0'}
os.makedirs(os.path.dirname(local_path), exist_ok=True)
with requests.get(url, stream=True, headers=headers, timeout=30) as r:
with open(local_path, 'wb') as f:
shutil.copyfileobj(r.raw, f)
def load_rag():
download_file(URL_FAISS, os.path.join(DOWNLOAD_DIR, 'index.faiss'))
download_file(URL_PKL, os.path.join(DOWNLOAD_DIR, 'index.pkl'))
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2",
model_kwargs={'device': 'cpu'},
cache_folder=TEMP_CACHE_DIR
)
vectorstore = FAISS.load_local(DB_FAISS_PATH, embeddings, allow_dangerous_deserialization=True)
# Asegúrate de que la Key esté en los Secrets de Hugging Face
llm = ChatGroq(temperature=0.1, model_name="openai/gpt-oss-120b")
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
return (
CONDENSE_PROMPT | llm,
INTENT_PROMPT | llm,
SALUDO_PROMPT | llm,
( {"context": retriever, "question": RunnablePassthrough()} | RAG_PROMPT | llm ),
retriever
)
# 5. INICIALIZACIÓN DE API
app = FastAPI()
condense_c, intent_c, saludo_c, rag_c, retriever = (None, None, None, None, None)
@app.on_event("startup")
async def startup():
global condense_c, intent_c, saludo_c, rag_c, retriever
condense_c, intent_c, saludo_c, rag_c, retriever = load_rag()
@app.get("/")
def root():
return {"status": "ready"}
@app.post("/query")
async def process_query(request: QueryRequest):
try:
# Convertir historial a texto
chat_str = "\n".join([f"U: {m[0]}\nB: {m[1]}" for m in request.history])
# Reescritura de pregunta
q_final = request.query
if request.history:
res_c = condense_c.invoke({"chat_history": chat_str, "question": request.query})
q_final = res_c.content.strip()
# Intención
res_i = intent_c.invoke({"query": q_final})
intent = res_i.content.upper()
if "SALUDO" in intent:
res_s = saludo_c.invoke({"query": request.query})
return {"response": res_s.content, "intent": "SALUDO"}
elif "OTRO" in intent:
return {"response": "Solo respondo sobre la UPT Aragua.", "intent": "OTRO"}
else:
# RAG
res_r = rag_c.invoke(q_final)
docs = retriever.invoke(q_final)
sources = list(set([d.metadata.get("source", "N/A") for d in docs]))
return {
"response": res_r.content,
"intent": "UNIVERSIDAD",
"sources": sources,
"contextual_query": q_final
}
except Exception as e:
# Aquí estaba tu error. Asegúrate de que esta línea esté
# alineada exactamente con el 'try:' de arriba.
return {"error": str(e)}
except Exception as e:
return {"error": f"Error al procesar la consulta: {e}"}