Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| import faiss | |
| import pickle | |
| import numpy as np | |
| import os | |
| from sentence_transformers import SentenceTransformer | |
| # === CONFIGURACIÓN === | |
| MODEL_NAME = "openai/gpt-oss-20b" | |
| HF_TOKEN = os.getenv("HF_TOKEN") # ← configurar en Secrets como HF_TOKEN = FS | |
| if not HF_TOKEN: | |
| raise RuntimeError("❌ Falta la variable 'HF_TOKEN' en Secrets del Space.") | |
| # === CARGAR FAISS Y DOCUMENTOS === | |
| index_path = "nlp_index.faiss" | |
| docs_path = "nlp_docs.pkl" | |
| if not os.path.exists(index_path) or not os.path.exists(docs_path): | |
| raise FileNotFoundError("❌ Faltan 'nlp_index.faiss' o 'nlp_docs.pkl' en la raíz del Space.") | |
| index = faiss.read_index(index_path) | |
| with open(docs_path, "rb") as f: | |
| data = pickle.load(f) | |
| texts = data["texts"] | |
| sources = data["sources"] | |
| embedding_model = SentenceTransformer('all-MiniLM-L6-v2') | |
| # === RECUPERACIÓN SEMÁNTICA === | |
| def retrieve_context(query: str, k: int = 2) -> str: | |
| try: | |
| emb = embedding_model.encode([query], convert_to_numpy=True).astype('float32') | |
| emb = emb / np.linalg.norm(emb) | |
| _, indices = index.search(emb, k) | |
| return "\n\n".join(texts[i] for i in indices[0]) | |
| except Exception: | |
| return "" | |
| # === RESPUESTA CON RAG + STREAMING === | |
| def respond( | |
| message, | |
| history: list[dict[str, str]], | |
| system_message, | |
| max_tokens, | |
| temperature, | |
| top_p, | |
| ): | |
| context = retrieve_context(message) | |
| if context: | |
| full_prompt = ( | |
| f"Responde usando únicamente la siguiente información de contexto. Si el contexto no responde la pregunta, usa tu conocimiento general pero sé honesto sobre sus límites.\n\n" | |
| f"--- CONTEXTO ---\n{context}\n--- FIN DEL CONTEXTO ---\n\n" | |
| f"Pregunta:\n{message}" | |
| ) | |
| else: | |
| full_prompt = message | |
| client = InferenceClient(token=HF_TOKEN, model=MODEL_NAME, timeout=60) | |
| messages = [{"role": "system", "content": system_message}] | |
| messages.extend(history) | |
| messages.append({"role": "user", "content": full_prompt}) | |
| response = "" | |
| first_token_received = False | |
| try: | |
| for chunk in client.chat_completion( | |
| messages, | |
| max_tokens=max_tokens, | |
| stream=True, | |
| temperature=temperature, | |
| top_p=top_p, | |
| ): | |
| first_token_received = True | |
| token = "" | |
| if chunk.choices and chunk.choices[0].delta.content: | |
| token = chunk.choices[0].delta.content | |
| response += token | |
| yield response | |
| except Exception as e: | |
| yield f"⚠️ Error durante la inferencia: {str(e)}" | |
| if not first_token_received: | |
| yield "⚠️ El modelo no generó ninguna respuesta. Intenta con una pregunta más clara o específica." | |
| # === INTERFAZ EN ESPAÑOL === | |
| chatbot = gr.ChatInterface( | |
| respond, | |
| type="messages", | |
| title="🧠 Experimentos NPL Quoota", | |
| description="Asistente basado en literatura de psicología cognitiva y desarrollo humano. Sistema RAG con más de 3.6 millones de caracteres indexados.", | |
| additional_inputs=[ | |
| gr.Textbox( | |
| value="Eres un asistente experto en desarrollo humano. Responde con claridad, profundidad y empatía, citando conceptos de los libros si es relevante.", | |
| label="Mensaje del sistema" | |
| ), | |
| gr.Slider( | |
| minimum=1, | |
| maximum=2048, | |
| value=2048, | |
| step=1, | |
| label="Máximo de tokens de salida", | |
| info="Número máximo de tokens que el modelo generará en su respuesta. 2048 es el límite del modelo." | |
| ), | |
| gr.Slider( | |
| minimum=0.1, | |
| maximum=4.0, | |
| value=0.7, | |
| step=0.1, | |
| label="Temperatura", | |
| info="Controla la creatividad: valores bajos (ej. 0.2) dan respuestas más enfocadas y predecibles; valores altos (ej. 1.5+) dan respuestas más variadas, sorprendentes o arriesgadas." | |
| ), | |
| gr.Slider( | |
| minimum=0.1, | |
| maximum=1.0, | |
| value=0.95, | |
| step=0.05, | |
| label="Top-p (muestreo nuclear)", | |
| info="Filtra opciones improbables. 0.95 equilibra diversidad y coherencia." | |
| ), | |
| ], | |
| ) | |
| # === LANZAR === | |
| with gr.Blocks() as demo: | |
| chatbot.render() | |
| if __name__ == "__main__": | |
| demo.launch() |