Spaces:

Nguyen5
/

chatbot1

Sleeping

File size: 6,043 Bytes

3a9ed51
 
 
6548bf5
80c3670
3a9ed51
f2e421a
3a9ed51
c411e11
 
 
 
3a9ed51
3dd5086
3a9ed51
fcc2090
 
c411e11
fcc2090
c411e11
3a9ed51
 
c411e11
3a9ed51
 
c411e11
3a9ed51
 
c411e11
3a9ed51
 
c411e11
3a9ed51
 
c411e11
3a9ed51
 
 
c411e11
 
3a9ed51
c411e11
ed0df67
3a9ed51
 
c411e11
f2e421a
3a9ed51
 
 
 
 
 
 
f2e421a
3a9ed51
b8e573b
3a9ed51
 
 
 
7c86ca3
3a9ed51
 
7c86ca3
3a9ed51
6548bf5
3a9ed51
 
 
 
c411e11
 
3a9ed51
c411e11
3a9ed51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c411e11
 
3a9ed51
c411e11
3a9ed51
80c3670
3a9ed51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80c3670
3a9ed51
 
ed0df67
c411e11
3a9ed51
c411e11
3a9ed51
80c3670
f2e421a
c411e11
3a9ed51
c411e11
3a9ed51
80c3670
3a9ed51
c411e11
3dd5086
6bb0f73
3a9ed51
6bb0f73
3a9ed51
 
 
 
 
 
 
 
 
 
 
533ef4b
3a9ed51
 
 
 
c411e11
3a9ed51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80c3670
3a9ed51
 
 
 
 
 
80c3670
3a9ed51
 
 
 
 
 
 
 
 
 
 
 
80c3670
3a9ed51
 
 
 
c411e11
11e64e1
6548bf5
80c3670

# app.py – Prüfungsrechts-Chatbot (RAG + Sprachmodus)
# Version 26.11 – ohne Modi, stabil für Text + Voice

import gradio as gr
from gradio_pdf import PDF
from huggingface_hub import hf_hub_download

from load_documents import load_documents, DATASET, PDF_FILE, HTML_FILE
from split_documents import split_documents
from vectorstore import build_vectorstore
from retriever import get_retriever
from llm import load_llm
from rag_pipeline import answer, PDF_BASE_URL, LAW_URL

from speech_io import transcribe_audio, synthesize_speech

# =====================================================
# INITIALISIERUNG (global)
# =====================================================

print("🔹 Lade Dokumente ...")
_docs = load_documents()

print("🔹 Splitte Dokumente ...")
_chunks = split_documents(_docs)

print("🔹 Baue VectorStore (FAISS) ...")
_vs = build_vectorstore(_chunks)

print("🔹 Erzeuge Retriever ...")
_retriever = get_retriever(_vs)

print("🔹 Lade LLM ...")
_llm = load_llm()

print("🔹 Lade Dateien für Viewer …")
_pdf_path = hf_hub_download(DATASET, PDF_FILE, repo_type="dataset")
_html_path = hf_hub_download(DATASET, HTML_FILE, repo_type="dataset")

# =====================================================
# Quellen formatieren – Markdown für Chat
# =====================================================

def format_sources_markdown(sources):
    if not sources:
        return ""

    lines = ["", "**📚 Quellen (genutzte Dokumentstellen):**"]
    for s in sources:
        sid = s["id"]
        src = s["source"]
        page = s["page"]
        url = s["url"]
        snippet = s["snippet"]

        title = f"Quelle {sid} – {src}"

        if url:
            base = f"- [{title}]({url})"
        else:
            base = f"- {title}"

        if page and "Prüfungsordnung" in src:
            base += f", Seite {page}"

        lines.append(base)

        if snippet:
            lines.append(f"  > {snippet}")

    return "\n".join(lines)

# =====================================================
# TEXT CHATBOT
# =====================================================

def chatbot_text(user_message, history):
    if not user_message:
        return history, ""

    answer_text, sources = answer(
        question=user_message,
        retriever=_retriever,
        chat_model=_llm,
    )

    quellen_block = format_sources_markdown(sources)

    history = history + [
        {"role": "user", "content": user_message},
        {"role": "assistant", "content": answer_text + quellen_block},
    ]

    return history, ""

# =====================================================
# VOICE CHATBOT
# =====================================================

def chatbot_voice(audio_path, history):
    # 1. Speech → Text
    text = transcribe_audio(audio_path)
    if not text:
        return history, None, ""

    # Lưu vào lịch sử chat
    history = history + [{"role": "user", "content": text}]

    # 2. RAG trả lời
    answer_text, sources = answer(
        question=text,
        retriever=_retriever,
        chat_model=_llm,
    )
    quellen_block = format_sources_markdown(sources)

    bot_msg = answer_text + quellen_block
    history = history + [{"role": "assistant", "content": bot_msg}]

    # 3. Text → Speech
    audio = synthesize_speech(bot_msg)

    return history, audio, ""

# =====================================================
# LAST ANSWER → TTS
# =====================================================

def read_last_answer(history):
    if not history:
        return None

    for msg in reversed(history):
        if msg["role"] == "assistant":
            return synthesize_speech(msg["content"])

    return None

# =====================================================
# UI – GRADIO
# =====================================================

with gr.Blocks(title="Prüfungsrechts-Chatbot (RAG + Sprache)") as demo:
    gr.Markdown("# 🧑‍⚖️ Prüfungsrechts-Chatbot")
    gr.Markdown(
        "Dieser Chatbot beantwortet Fragen **ausschließlich** aus der "
        "Prüfungsordnung (PDF) und dem Hochschulgesetz NRW (Website). "
        "Du kannst Text eingeben oder direkt ins Mikrofon sprechen."
    )

    with gr.Row():
        with gr.Column(scale=2):
            chatbot = gr.Chatbot(label="Chat", height=500)

            msg = gr.Textbox(
                label="Frage eingeben",
                placeholder="Stelle deine Frage zum Prüfungsrecht …",
            )

            # TEXT SENDEN
            msg.submit(
                chatbot_text,
                [msg, chatbot],
                [chatbot, msg]
            )

            send_btn = gr.Button("Senden (Text)")
            send_btn.click(
                chatbot_text,
                [msg, chatbot],
                [chatbot, msg]
            )

            # SPRACHEINGABE
            gr.Markdown("### 🎙️ Spracheingabe")
            voice_in = gr.Audio(sources=["microphone"], type="filepath")
            voice_out = gr.Audio(label="Vorgelesene Antwort", type="numpy")

            voice_btn = gr.Button("Sprechen & senden")
            voice_btn.click(
                chatbot_voice,
                [voice_in, chatbot],
                [chatbot, voice_out, msg]
            )

            read_btn = gr.Button("🔁 Antwort erneut vorlesen")
            read_btn.click(
                read_last_answer,
                [chatbot],
                [voice_out]
            )

            clear_btn = gr.Button("Chat zurücksetzen")
            clear_btn.click(lambda: [], None, chatbot)

        # =====================
        # RECHTE SPALTE: Viewer
        # =====================

        with gr.Column(scale=1):
            gr.Markdown("### 📄 Prüfungsordnung (PDF)")
            PDF(_pdf_path, height=350)

            gr.Markdown("### 📘 Hochschulgesetz NRW (Website)")
            gr.HTML(
                f'<iframe src="{LAW_URL}" style="width:100%;height:350px;border:none;"></iframe>'
            )

if __name__ == "__main__":
    demo.launch()