Spaces:

Nguyen5
/

chatbot

Sleeping

App Files Files Community

Nguyen5 commited on Dec 3, 2025

Commit

160e79a

1 Parent(s): dedb74d

commit

Browse files

Files changed (1) hide show

app.py +58 -49

app.py CHANGED Viewed

@@ -10,119 +10,128 @@ from rag_pipeline import rag_answer
 client = OpenAI()
 BUCKET = os.environ["SUPABASE_BUCKET"]
-# --------------------------------------------------------
-# PDF base64 viewer (tránh Chrome block)
-# --------------------------------------------------------
 def encode_pdf_src():
     pdf_bytes = load_file_bytes(BUCKET, "pruefungsordnung.pdf")
     b64 = base64.b64encode(pdf_bytes).decode("utf-8")
     return f"data:application/pdf;base64,{b64}"
-# --------------------------------------------------------
 # HTML viewer
-# --------------------------------------------------------
 def encode_html():
     html_bytes = load_file_bytes(BUCKET, "hochschulgesetz.html")
     return html_bytes.decode("utf-8", errors="ignore")
-# --------------------------------------------------------
-# Speech-to-text
-# --------------------------------------------------------
-def transcribe(audio_path: str) -> str:
     if audio_path is None:
         return ""
     with open(audio_path, "rb") as f:
         result = client.audio.transcriptions.create(
             model="whisper-1",
             file=f,
         )
     return (result.text or "").strip()
-# --------------------------------------------------------
-# Chat logic
-# --------------------------------------------------------
 def chat_fn(text, audio, history):
     text = (text or "").strip()
-    # 1) Ưu tiên TEXT: nếu user đã gõ thì KHÔNG dùng audio nữa
     if text:
         question = text
-        spoken = ""
-    # 2) Nếu không có text nhưng có audio thì mới dùng Whisper
     elif audio is not None:
-        spoken = transcribe(audio)
-        question = spoken
     else:
-        # không có cả text lẫn audio
-        return history, "<p>Bitte Text eingeben oder Mikrofon benutzen.</p>", None
     if not question:
-        return history, "<p>Spracherkennung fehlgeschlagen. Bitte erneut sprechen oder Text tippen.</p>", None
-    # 3) Gọi RAG pipeline (history là list[dict{role, content}])
     answer, docs = rag_answer(question, history or [])
-    # 4) Xây dựng HTML nguồn
     html = "<ol>"
     for i, d in enumerate(docs):
         meta = d.get("metadata", {}) or {}
         src = meta.get("source", "?")
         page = meta.get("page", None)
-        page_info = f" (Seite {page})" if page else ""
         snippet = (d.get("content") or "")[:200]
-        html += f"<li><b>{src}{page_info}</b><br>{snippet}...</li>"
     html += "</ol>"
-    # 5) Lịch sử ở dạng messages (Gradio message-format)
     new_history = (history or []) + [
         {"role": "user", "content": question},
         {"role": "assistant", "content": answer},
     ]
-    # Trả về None cho audio để xóa bản ghi cũ
-    return new_history, html, None
-# --------------------------------------------------------
-# UI
-# --------------------------------------------------------
 with gr.Blocks() as demo:
     gr.Markdown("# ⚖️ Sprachbasierter Chatbot für Prüfungsrecht")
     with gr.Row():
         with gr.Column(scale=3):
-            # Gradio trên HF hiện đang dùng messages-format
-            chatbot = gr.Chatbot(label="Chat")
-            text_input = gr.Textbox(
-                label="Text Eingabe",
-                placeholder="Frage hier eintippen ..."
-            )
-            audio_input = gr.Audio(
-                type="filepath",
-                label="Spracheingabe (Mikrofon)"
-            )
             send_btn = gr.Button("Senden")
         with gr.Column(scale=2):
             gr.Markdown("### 📄 Prüfungsordnung PDF")
             gr.HTML(
-                f"<iframe src='{encode_pdf_src()}' "
-                "width='100%' height='250'></iframe>"
             )
             gr.Markdown("### 📜 Hochschulgesetz NRW")
             gr.HTML(
-                "<div style='overflow:auto;height:250px;"
-                "border:1px solid #ccc;padding:10px;'>"
-                f"{encode_html()}</div>"
             )
             sources_html = gr.HTML()
-    # Lưu ý: thêm audio_input vào outputs để có thể reset về None
     send_btn.click(
         chat_fn,
         inputs=[text_input, audio_input, chatbot],

 client = OpenAI()
 BUCKET = os.environ["SUPABASE_BUCKET"]
+# ------------------------------------------
+# Public URLs để mở PDF/HTML khi nhấn Quelle
+# ------------------------------------------
+PDF_URL = f"{os.environ['SUPABASE_URL']}/storage/v1/object/public/{BUCKET}/pruefungsordnung.pdf"
+HG_URL  = f"{os.environ['SUPABASE_URL']}/storage/v1/object/public/{BUCKET}/hochschulgesetz.html"
+# ------------------------------------------
+# Viewer PDF base64
+# ------------------------------------------
 def encode_pdf_src():
     pdf_bytes = load_file_bytes(BUCKET, "pruefungsordnung.pdf")
     b64 = base64.b64encode(pdf_bytes).decode("utf-8")
     return f"data:application/pdf;base64,{b64}"
+# ------------------------------------------
 # HTML viewer
+# ------------------------------------------
 def encode_html():
     html_bytes = load_file_bytes(BUCKET, "hochschulgesetz.html")
     return html_bytes.decode("utf-8", errors="ignore")
+# ------------------------------------------
+# Speech-to-text FIXED
+# ------------------------------------------
+def transcribe(audio_path):
     if audio_path is None:
         return ""
     with open(audio_path, "rb") as f:
         result = client.audio.transcriptions.create(
             model="whisper-1",
             file=f,
+            language="de",      # ép tiếng Đức
+            temperature=0.0     # ổn định kết quả
         )
     return (result.text or "").strip()
+# ------------------------------------------
+# MAIN CHAT FUNCTION
+# ------------------------------------------
 def chat_fn(text, audio, history):
     text = (text or "").strip()
+    # 1) Ưu tiên text, không dùng audio nếu text có
     if text:
         question = text
     elif audio is not None:
+        question = transcribe(audio)
     else:
+        return history, "<p>Bitte Text oder Mikrofon benutzen.</p>", None
     if not question:
+        return history, "<p>Spracherkennung fehlgeschlagen.</p>", None
+    # 2) RAG
     answer, docs = rag_answer(question, history or [])
+    # 3) Build Quellen (click được)
     html = "<ol>"
     for i, d in enumerate(docs):
         meta = d.get("metadata", {}) or {}
         src = meta.get("source", "?")
+        if "Prüfungsordnung" in src:
+            link = PDF_URL
+        else:
+            link = HG_URL
         page = meta.get("page", None)
+        page_info = f"(Seite {page})" if page else ""
         snippet = (d.get("content") or "")[:200]
+        html += f"""
+        <li>
+            <a href="{link}" target="_blank">
+                <b>Quelle {i+1}: {src} {page_info}</b>
+            </a><br>
+            {snippet}...
+        </li>
+        """
     html += "</ol>"
+    # 4) Gradio message history
     new_history = (history or []) + [
         {"role": "user", "content": question},
         {"role": "assistant", "content": answer},
     ]
+    # Reset audio input
+    return new_history, html, gr.update(value=None)
+# ------------------------------------------
+# UI LAYOUT
+# ------------------------------------------
 with gr.Blocks() as demo:
     gr.Markdown("# ⚖️ Sprachbasierter Chatbot für Prüfungsrecht")
     with gr.Row():
         with gr.Column(scale=3):
+            chatbot = gr.Chatbot(label="Chat (RAG)")
+            text_input = gr.Textbox(label="Text Eingabe")
+            audio_input = gr.Audio(type="filepath", label="Spracheingabe (Mikrofon)")
             send_btn = gr.Button("Senden")
         with gr.Column(scale=2):
             gr.Markdown("### 📄 Prüfungsordnung PDF")
             gr.HTML(
+                f"<iframe src='{encode_pdf_src()}' width='100%' height='250'></iframe>"
             )
             gr.Markdown("### 📜 Hochschulgesetz NRW")
             gr.HTML(
+                f"<div style='overflow:auto;height:250px;'>{encode_html()}</div>"
             )
             sources_html = gr.HTML()
     send_btn.click(
         chat_fn,
         inputs=[text_input, audio_input, chatbot],