Spaces:

Antoni341
/

Asistente_SUMA

Sleeping

App Files Files Community

Antoni341 commited on 25 days ago

Commit

01fe139

verified ·

1 Parent(s): b93a0e4

Upload 2 files

Browse files

Files changed (2) hide show

app.py +187 -0
requirements.txt +219 -0

app.py ADDED Viewed

	@@ -0,0 +1,187 @@

+import os
+import gradio as gr
+from langchain_openai import ChatOpenAI
+from langchain_community.vectorstores import Qdrant
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from qdrant_client import QdrantClient, models
+from qdrant_client.http import models as rest_models
+from langchain.chains import create_history_aware_retriever, create_retrieval_chain
+from langchain.chains.combine_documents import create_stuff_documents_chain
+from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
+from langchain_community.chat_message_histories import ChatMessageHistory
+from langchain_core.runnables.history import RunnableWithMessageHistory
+from dotenv import load_dotenv
+load_dotenv()
+QDRANT_URL = os.getenv("QDRANT_URL")
+QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+COLLECTION_NAME = "dgt_documents_qdrant_memory_filter_fixed_2"
+OPCIONES_CATEGORIAS = [
+    "Todas",
+    "Documentos de la SUMA",
+    "Manuales Técnicos y Procedimientos",
+    "Inventarios y Activos SUMA",
+    "Otros"
+]
+# --- 2. INICIALIZAR CLIENTES ---
+# Cliente Qdrant
+client = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)
+# Embeddings (Mismo modelo que usaste para subir los datos)
+# IMPORTANTE: device='cpu' para que funcione en el plan gratuito de HF
+embeddings_model = HuggingFaceEmbeddings(
+    model_name="intfloat/e5-large-v2",
+    model_kwargs={'device': 'cpu'},
+    encode_kwargs={'normalize_embeddings': False}
+)
+# LLM (Modelo de chat)
+llm_openai = ChatOpenAI(
+    model="gpt-4o-mini", # Corregido a un modelo válido y económico
+    temperature=0.1,
+    api_key=OPENAI_API_KEY
+)
+# Conexión a la VectorDB (Solo lectura)
+vectordb = Qdrant(
+    client=client,
+    collection_name=COLLECTION_NAME,
+    embeddings=embeddings_model,
+    content_payload_key="content"
+)
+# --- 3. PROMPTS Y CADENAS ---
+contextualize_q_system_prompt = """Dado un historial de chat y la última pregunta del usuario \
+que podría hacer referencia al contexto en el historial de chat, formula una pregunta independiente \
+que pueda entenderse sin el historial de chat. NO respondas a la pregunta, \
+solo reformúlala si es necesario y, si no, devuélvela tal cual."""
+contextualize_q_prompt = ChatPromptTemplate.from_messages(
+    [
+        ("system", contextualize_q_system_prompt),
+        MessagesPlaceholder("chat_history"),
+        ("human", "{input}"),
+    ]
+)
+qa_system_prompt = """Eres un asistente especializado en los documentos sobre la Sociedad Musical de Alberic (SUMA). \
+Utiliza los siguientes fragmentos de contexto recuperado para responder a la pregunta. \
+Si no sabes la respuesta, di que no lo sabes. \
+Menciona siempre de qué documentos has extraído la información (usando el metadato 'source' si es posible). \
+Profundiza en la respuesta.
+Contexto:
+{context}"""
+qa_prompt = ChatPromptTemplate.from_messages(
+    [
+        ("system", qa_system_prompt),
+        MessagesPlaceholder("chat_history"),
+        ("human", "{input}"),
+    ]
+)
+# --- 4. GESTIÓN DE MEMORIA ---
+store = {}
+def get_session_history(session_id: str):
+    if session_id not in store:
+        store[session_id] = ChatMessageHistory()
+    return store[session_id]
+# --- 5. LÓGICA DEL CHAT CON FILTROS ---
+def build_qdrant_filter(category_name):
+    if not category_name or category_name == "Todas":
+        return None
+    return rest_models.Filter(
+        must=[
+            rest_models.FieldCondition(
+                key="category",
+                match=rest_models.MatchValue(value=category_name)
+            )
+        ]
+    )
+def chat_logic(message, history, selected_category):
+    # Identificador de sesión (en demo simple usamos uno fijo o aleatorio simple)
+    session_id = "usuario_web"
+    # 1. Construir filtro
+    qdrant_filter = build_qdrant_filter(selected_category)
+    # 2. Retriever dinámico
+    dynamic_retriever = vectordb.as_retriever(
+        search_kwargs={
+            "k": 4,
+            "filter": qdrant_filter
+        }
+    )
+    # 3. Cadenas
+    history_aware_retriever = create_history_aware_retriever(
+        llm_openai, dynamic_retriever, contextualize_q_prompt
+    )
+    question_answer_chain = create_stuff_documents_chain(llm_openai, qa_prompt)
+    rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
+    conversational_rag_chain = RunnableWithMessageHistory(
+        rag_chain,
+        get_session_history,
+        input_messages_key="input",
+        history_messages_key="chat_history",
+        output_messages_key="answer",
+    )
+    # 4. Generar respuesta (Streaming)
+    full_response = ""
+    for chunk in conversational_rag_chain.stream(
+        {"input": message},
+        config={"configurable": {"session_id": session_id}}
+    ):
+        if "answer" in chunk:
+            full_response += chunk["answer"]
+            yield full_response
+# --- 6. INTERFAZ GRADIO ---
+# CSS personalizado para ocultar el footer y ajustar estilo
+custom_css = """
+footer {visibility: hidden}
+.gradio-container {background-color: #f9fafb}
+"""
+tema_musical = gr.themes.Soft(primary_hue="indigo", secondary_hue="slate")
+with gr.Blocks(theme=tema_musical, css=custom_css, title="Chatbot SUMA") as demo:
+    gr.Markdown("# 🎵 Asistente Virtual SUMA")
+    gr.Markdown("Pregunta sobre normativas, manuales y documentos internos.")
+    # Dropdown de filtro
+    filtro_dropdown = gr.Dropdown(
+        choices=OPCIONES_CATEGORIAS,
+        value="Todas",
+        label="📂 Filtrar por Categoría",
+        info="Acota la búsqueda a un tipo de documento específico."
+    )
+    # Chat Interface
+    chat_interface = gr.ChatInterface(
+        fn=chat_logic,
+        additional_inputs=[filtro_dropdown],
+        examples=[
+            ["¿Cuáles son los requisitos para ser socio?"],
+            ["Resumen del manual de procedimientos"],
+        ]
+    )
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,219 @@

+accelerate==1.10.1
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiosignal==1.4.0
+annotated-types==0.7.0
+anyio==4.10.0
+asttokens @ file:///home/conda/feedstock_root/build_artifacts/asttokens_1733250440834/work
+attrs==25.3.0
+backoff==2.2.1
+beautifulsoup4==4.13.5
+Brotli==1.1.0
+certifi==2025.8.3
+cffi==2.0.0
+charset-normalizer==3.4.3
+click==8.3.0
+coloredlogs==15.0.1
+comm @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_comm_1753453984/work
+contourpy==1.3.3
+cryptography==46.0.1
+cycler==0.12.1
+dataclasses-json==0.6.7
+debugpy @ file:///home/task_175706710974480/conda-bld/debugpy_1757067125926/work
+decorator @ file:///home/conda/feedstock_root/build_artifacts/decorator_1740384970518/work
+distro==1.9.0
+emoji==2.15.0
+exceptiongroup @ file:///home/conda/feedstock_root/build_artifacts/exceptiongroup_1746947292760/work
+executing @ file:///home/conda/feedstock_root/build_artifacts/executing_1756729339227/work
+fastapi==0.117.1
+fastembed==0.7.3
+ffmpy==0.6.1
+filelock==3.19.1
+filetype==1.2.0
+flatbuffers==25.9.23
+fonttools==4.60.0
+frozenlist==1.7.0
+fsspec==2025.9.0
+gradio==5.46.1
+gradio_client==1.13.1
+greenlet==3.2.4
+groovy==0.1.2
+grpcio==1.75.0
+h11==0.16.0
+h2==4.3.0
+hf-xet==1.1.10
+hpack==4.1.0
+html5lib==1.1
+httpcore==1.0.9
+httpx==0.28.1
+httpx-sse==0.4.1
+huggingface-hub==0.35.0
+humanfriendly==10.0
+hyperframe==6.1.0
+idna==3.10
+importlib_metadata @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_importlib-metadata_1747934053/work
+ipykernel @ file:///home/conda/feedstock_root/build_artifacts/ipykernel_1754352855579/work
+ipython @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_ipython_1756474504/work
+ipython_pygments_lexers @ file:///home/conda/feedstock_root/build_artifacts/ipython_pygments_lexers_1737123620466/work
+ipywidgets==8.1.7
+jedi @ file:///home/conda/feedstock_root/build_artifacts/jedi_1733300866624/work
+Jinja2==3.1.6
+jiter==0.11.0
+joblib==1.5.2
+jpype1==1.6.0
+jsonpatch==1.33
+jsonpointer==3.0.0
+jupyter_client @ file:///home/conda/feedstock_root/build_artifacts/jupyter_client_1733440914442/work
+jupyter_core @ file:///home/conda/feedstock_root/build_artifacts/jupyter_core_1748333051527/work
+jupyterlab_widgets==3.0.15
+kiwisolver==1.4.9
+langchain==0.3.27
+langchain-community==0.3.29
+langchain-core==0.3.76
+langchain-experimental==0.3.4
+langchain-huggingface==0.3.1
+langchain-ollama==0.3.8
+langchain-openai==0.3.33
+langchain-qdrant==0.2.1
+langchain-text-splitters==0.3.11
+langdetect==1.0.9
+langsmith==0.4.28
+loguru==0.7.3
+lxml==6.0.2
+Markdown==3.9
+markdown-it-py==4.0.0
+MarkupSafe==3.0.2
+marshmallow==3.26.1
+matplotlib==3.10.6
+matplotlib-inline @ file:///home/conda/feedstock_root/build_artifacts/matplotlib-inline_1733416936468/work
+mdurl==0.1.2
+ml_dtypes==0.5.3
+mmh3==5.2.0
+mpmath==1.3.0
+multidict==6.6.4
+mypy_extensions==1.1.0
+nest_asyncio @ file:///home/conda/feedstock_root/build_artifacts/nest-asyncio_1733325553580/work
+networkx==3.5
+nltk==3.9.2
+numpy==2.2.6
+nvidia-cublas-cu12==12.8.4.1
+nvidia-cuda-cupti-cu12==12.8.90
+nvidia-cuda-nvrtc-cu12==12.8.93
+nvidia-cuda-runtime-cu12==12.8.90
+nvidia-cudnn-cu12==9.10.2.21
+nvidia-cufft-cu12==11.3.3.83
+nvidia-cufile-cu12==1.13.1.3
+nvidia-curand-cu12==10.3.9.90
+nvidia-cusolver-cu12==11.7.3.90
+nvidia-cusparse-cu12==12.5.8.93
+nvidia-cusparselt-cu12==0.7.1
+nvidia-nccl-cu12==2.27.3
+nvidia-nvjitlink-cu12==12.8.93
+nvidia-nvtx-cu12==12.8.90
+olefile==0.47
+ollama==0.5.4
+onnx==1.19.0
+onnxruntime==1.22.1
+openai==1.108.0
+opencv-python==4.12.0.88
+orjson==3.11.3
+packaging @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_packaging_1745345660/work
+pandas==2.3.2
+parso @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_parso_1755974222/work
+pdf2image==1.17.0
+pdfminer==20191125
+pdfminer.six==20250506
+pexpect @ file:///home/conda/feedstock_root/build_artifacts/pexpect_1733301927746/work
+pi_heif==1.1.0
+pickleshare @ file:///home/conda/feedstock_root/build_artifacts/pickleshare_1733327343728/work
+pillow==11.3.0
+platformdirs @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_platformdirs_1756227402/work
+portalocker==3.2.0
+prompt_toolkit @ file:///home/conda/feedstock_root/build_artifacts/prompt-toolkit_1756321756983/work
+propcache==0.3.2
+protobuf==6.32.1
+psutil @ file:///home/task_175710406524895/conda-bld/psutil_1757104541967/work
+ptyprocess @ file:///home/conda/feedstock_root/build_artifacts/ptyprocess_1733302279685/work/dist/ptyprocess-0.7.0-py2.py3-none-any.whl#sha256=92c32ff62b5fd8cf325bec5ab90d7be3d2a8ca8c8a3813ff487a8d2002630d1f
+pure_eval @ file:///home/conda/feedstock_root/build_artifacts/pure_eval_1733569405015/work
+py_rust_stemmers==0.1.5
+pycparser==2.23
+pycryptodome==3.23.0
+pydantic==2.11.9
+pydantic-settings==2.10.1
+pydantic_core==2.33.2
+pydub==0.25.1
+Pygments @ file:///home/conda/feedstock_root/build_artifacts/pygments_1750615794071/work
+PyMuPDF==1.26.4
+pyparsing==3.2.5
+pypdf==6.0.0
+pypdfium2==4.30.0
+pytesseract==0.3.13
+python-dateutil @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_python-dateutil_1751104122/work
+python-dotenv==1.1.1
+python-iso639==2025.2.18
+python-magic==0.4.27
+python-multipart==0.0.20
+python-oxmsg==0.0.2
+pytz==2025.2
+PyYAML==6.0.2
+pyzmq @ file:///home/task_175810350411579/conda-bld/pyzmq_1758103935618/work
+qdrant-client==1.15.1
+rake-nltk==1.0.6
+rank-bm25==0.2.2
+RapidFuzz==3.14.1
+regex==2025.9.1
+requests==2.32.5
+requests-toolbelt==1.0.0
+rich==14.1.0
+ruff==0.13.1
+safehttpx==0.1.6
+safetensors==0.6.2
+scikit-learn==1.7.2
+scipy==1.16.2
+semantic-version==2.10.0
+sentence-transformers==5.1.0
+setuptools==78.1.1
+shellingham==1.5.4
+six @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_six_1753199211/work
+sniffio==1.3.1
+soupsieve==2.8
+SQLAlchemy==2.0.43
+stack_data @ file:///home/conda/feedstock_root/build_artifacts/stack_data_1733569443808/work
+starlette==0.48.0
+sympy==1.14.0
+tabula-py==2.10.0
+tabulate==0.9.0
+tenacity==9.1.2
+threadpoolctl==3.6.0
+tiktoken==0.11.0
+timm==1.0.20
+tokenizers==0.22.0
+tomlkit==0.13.3
+torch==2.8.0
+torchvision==0.23.0
+tornado @ file:///croot/tornado_1748956929273/work
+tqdm==4.67.1
+traitlets @ file:///home/conda/feedstock_root/build_artifacts/traitlets_1733367359838/work
+transformers==4.56.1
+triton==3.4.0
+typer==0.19.1
+typing-inspect==0.9.0
+typing-inspection==0.4.1
+typing_extensions @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_typing_extensions_1756220668/work
+tzdata==2025.2
+unstructured==0.18.15
+unstructured-client==0.42.3
+unstructured-inference==1.0.5
+unstructured.pytesseract==0.3.15
+urllib3==2.5.0
+uvicorn==0.36.0
+wcwidth @ file:///home/conda/feedstock_root/build_artifacts/wcwidth_1733231326287/work
+webencodings==0.5.1
+websockets==15.0.1
+wheel==0.45.1
+widgetsnbextension==4.0.14
+wrapt==1.17.3
+yarl==1.20.1
+zipp @ file:///home/conda/feedstock_root/build_artifacts/zipp_1749421620841/work
+zstandard==0.25.0