Spaces:
No application file
No application file
Upload 4 files
Browse files- app.py +40 -0
- documents.json +7 -0
- rag_engine.py +72 -0
- requirements.txt +8 -0
app.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from rag_engine import recuperar_documentos, generar_respuesta, preguntar
|
| 3 |
+
|
| 4 |
+
def ask(query, top_k, umbral):
|
| 5 |
+
# Obtener documentos relevantes
|
| 6 |
+
docs_recuperados = recuperar_documentos(query, top_k, umbral)
|
| 7 |
+
|
| 8 |
+
# Generar respuesta usando la lógica del motor
|
| 9 |
+
respuesta = generar_respuesta(query, docs_recuperados)
|
| 10 |
+
|
| 11 |
+
# Formatear documentos para mostrarlos
|
| 12 |
+
docs_formateados = "\n\n---\n\n".join(docs_recuperados)
|
| 13 |
+
|
| 14 |
+
return respuesta, docs_formateados
|
| 15 |
+
|
| 16 |
+
# Construcción de la Interfaz
|
| 17 |
+
with gr.Blocks(title="RAG Hospital System") as demo:
|
| 18 |
+
gr.Markdown("# 🏥 Hospital Q&A System (RAG)")
|
| 19 |
+
gr.Markdown("Ask questions about contact details, hours, and services.")
|
| 20 |
+
|
| 21 |
+
with gr.Row():
|
| 22 |
+
with gr.Column():
|
| 23 |
+
input_text = gr.Textbox(label="Question", placeholder="E.g., What are the working hours?", lines=2)
|
| 24 |
+
slider_k = gr.Slider(minimum=1, maximum=5, value=5, step=1, label="Top K Documents")
|
| 25 |
+
slider_threshold = gr.Slider(minimum=0.0, maximum=1.0, value=0.55, step=0.05, label="Similarity Threshold")
|
| 26 |
+
btn = gr.Button("Send")
|
| 27 |
+
|
| 28 |
+
with gr.Column():
|
| 29 |
+
output_answer = gr.Textbox(label="Generated Answer", lines=3)
|
| 30 |
+
output_docs = gr.Textbox(label="Retrieved Context", lines=6, max_lines=15)
|
| 31 |
+
|
| 32 |
+
# Evento de clic
|
| 33 |
+
btn.click(
|
| 34 |
+
fn=ask,
|
| 35 |
+
inputs=[input_text, slider_k, slider_threshold],
|
| 36 |
+
outputs=[output_answer, output_docs]
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
if __name__ == "__main__":
|
| 40 |
+
demo.launch()
|
documents.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"doc1": "Hospital contact details: You can contact the hospital at email testing@gmail.com, phone +911234567890, or visit us at xyz, abc, 1234, Nepal.",
|
| 3 |
+
"doc2": "Hospital's working hours: The hospital's working hours are 7:00 AM - 8:00 PM daily.",
|
| 4 |
+
"doc3": "Official email address: The official email address to contact the hospital is testing@gmail.com.",
|
| 5 |
+
"doc4": "Main services: We provide comprehensive healthcare services including emergency care, diagnostic testing, surgical procedures, maternity services, and specialized treatments.",
|
| 6 |
+
"doc5": "Hospital location: The hospital is located at xyz, abc, 1234, Nepal."
|
| 7 |
+
}
|
rag_engine.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import torch
|
| 3 |
+
from sentence_transformers import SentenceTransformer, util
|
| 4 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 5 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
| 6 |
+
|
| 7 |
+
# --- Carga de Modelos y Datos ---
|
| 8 |
+
# Modelo de embeddings
|
| 9 |
+
embedding_model = SentenceTransformer("MongoDB/mdbr-leaf-ir")
|
| 10 |
+
|
| 11 |
+
# Modelo de lenguaje y tokenizer
|
| 12 |
+
model_name = "PleIAs/Pleias-RAG-350M"
|
| 13 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 14 |
+
llm_model = AutoModelForCausalLM.from_pretrained(model_name)
|
| 15 |
+
|
| 16 |
+
# Cargar documentos
|
| 17 |
+
with open("documents.json", "r", encoding="utf-8") as f:
|
| 18 |
+
docs_data = json.load(f)
|
| 19 |
+
# Extraemos solo el texto de los documentos
|
| 20 |
+
docs_texts = list(docs_data.values())
|
| 21 |
+
|
| 22 |
+
# Precalcular embeddings de los documentos (una sola vez)
|
| 23 |
+
docs_embeddings = embedding_model.encode(docs_texts)
|
| 24 |
+
|
| 25 |
+
def recuperar_documentos(consulta, top_k=2, umbral=0.4):
|
| 26 |
+
"""Recupera los documentos más similares a la consulta."""
|
| 27 |
+
# 1. Calcular embedding de la consulta
|
| 28 |
+
query_embedding = embedding_model.encode([consulta])
|
| 29 |
+
|
| 30 |
+
# 2. Calcular similitud del coseno
|
| 31 |
+
similitudes = cosine_similarity(query_embedding, docs_embeddings)[0]
|
| 32 |
+
|
| 33 |
+
# 3. Emparejar textos con sus similitudes y ordenar
|
| 34 |
+
docs_con_similitud = sorted(
|
| 35 |
+
zip(docs_texts, similitudes),
|
| 36 |
+
key=lambda x: x[1],
|
| 37 |
+
reverse=True
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
# 4. Filtrar por umbral y top_k
|
| 41 |
+
seleccionados = []
|
| 42 |
+
for texto, sim in docs_con_similitud:
|
| 43 |
+
if sim >= umbral and len(seleccionados) < top_k:
|
| 44 |
+
seleccionados.append(texto)
|
| 45 |
+
|
| 46 |
+
return seleccionados
|
| 47 |
+
|
| 48 |
+
def generar_respuesta(consulta, documentos_recuperados):
|
| 49 |
+
"""Genera una respuesta usando el contexto inyectado."""
|
| 50 |
+
# 1. Concatenar documentos
|
| 51 |
+
contexto = " ".join(documentos_recuperados)
|
| 52 |
+
|
| 53 |
+
# 2. Construir el prompt (formato exacto pedido)
|
| 54 |
+
prompt = f"Answer the question based only on the context provided\nContext: {contexto}\nQuestion: {consulta}\nAnswer:"
|
| 55 |
+
|
| 56 |
+
# 3. Generar respuesta
|
| 57 |
+
inputs = tokenizer(prompt, return_tensors="pt")
|
| 58 |
+
outputs = llm_model.generate(**inputs, max_new_tokens=150)
|
| 59 |
+
respuesta_completa = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 60 |
+
|
| 61 |
+
# Extraer solo la parte después de "Answer:"
|
| 62 |
+
respuesta = respuesta_completa.split("Answer:")[-1].strip()
|
| 63 |
+
return respuesta
|
| 64 |
+
|
| 65 |
+
def preguntar(consulta, top_k=2, umbral=0.4):
|
| 66 |
+
"""Función de alto nivel que une recuperación y generación."""
|
| 67 |
+
docs = recuperar_documentos(consulta, top_k, umbral)
|
| 68 |
+
if not docs:
|
| 69 |
+
return "I'm sorry, I couldn't find relevant information in the knowledge base."
|
| 70 |
+
|
| 71 |
+
respuesta = generar_respuesta(consulta, docs)
|
| 72 |
+
return respuesta
|
requirements.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
torch
|
| 2 |
+
transformers
|
| 3 |
+
sentence-transformers
|
| 4 |
+
scikit-learn
|
| 5 |
+
fastapi
|
| 6 |
+
uvicorn
|
| 7 |
+
gradio
|
| 8 |
+
pydantic
|