Spaces:
Runtime error
Runtime error
Commit ·
1e88950
1
Parent(s): 3aa1632
fix
Browse files- app.py +85 -52
- test/wikitop10.txt +0 -0
- test/wikitop100.txt +0 -0
app.py
CHANGED
|
@@ -1,17 +1,17 @@
|
|
| 1 |
-
# app.py - v2.
|
| 2 |
-
# Beschreibung:
|
| 3 |
-
#
|
| 4 |
-
#
|
| 5 |
|
| 6 |
import os
|
| 7 |
import torch
|
| 8 |
import gradio as gr
|
|
|
|
| 9 |
|
| 10 |
from typing import List, Tuple, Generator, Dict
|
| 11 |
from threading import Thread
|
| 12 |
|
| 13 |
# ML / Transformers
|
| 14 |
-
# HIER DIE KORREKTUR: TextIteratorStreamer statt TextStreamer
|
| 15 |
from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIteratorStreamer
|
| 16 |
|
| 17 |
# Dokumentenverarbeitung & RAG
|
|
@@ -32,6 +32,10 @@ LLM_MODEL: Gemma3ForConditionalGeneration = None
|
|
| 32 |
LLM_PROCESSOR: AutoProcessor = None
|
| 33 |
VECTOR_STORE: FAISS = None
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
# --------------------------------------------------------------------
|
| 36 |
# Model Loading
|
| 37 |
# --------------------------------------------------------------------
|
|
@@ -42,16 +46,20 @@ def get_device() -> torch.device:
|
|
| 42 |
def get_embedding_function() -> HuggingFaceEmbeddings:
|
| 43 |
global EMBEDDING_FUNCTION
|
| 44 |
if EMBEDDING_FUNCTION is None:
|
|
|
|
|
|
|
| 45 |
EMBEDDING_FUNCTION = HuggingFaceEmbeddings(
|
| 46 |
model_name=EMBED_MODEL_ID,
|
| 47 |
-
model_kwargs={'device':
|
| 48 |
)
|
|
|
|
| 49 |
return EMBEDDING_FUNCTION
|
| 50 |
|
| 51 |
def get_llm() -> Tuple[Gemma3ForConditionalGeneration, AutoProcessor]:
|
| 52 |
global LLM_MODEL, LLM_PROCESSOR
|
| 53 |
if LLM_MODEL is None or LLM_PROCESSOR is None:
|
| 54 |
device = get_device()
|
|
|
|
| 55 |
dtype = torch.bfloat16 if "cuda" in device.type else torch.float32
|
| 56 |
LLM_MODEL = Gemma3ForConditionalGeneration.from_pretrained(
|
| 57 |
LLM_MODEL_ID,
|
|
@@ -59,6 +67,7 @@ def get_llm() -> Tuple[Gemma3ForConditionalGeneration, AutoProcessor]:
|
|
| 59 |
device_map="auto",
|
| 60 |
).eval()
|
| 61 |
LLM_PROCESSOR = AutoProcessor.from_pretrained(LLM_MODEL_ID)
|
|
|
|
| 62 |
return LLM_MODEL, LLM_PROCESSOR
|
| 63 |
|
| 64 |
# --------------------------------------------------------------------
|
|
@@ -90,12 +99,13 @@ def get_text_splitter() -> RecursiveCharacterTextSplitter:
|
|
| 90 |
# --------------------------------------------------------------------
|
| 91 |
def index_files(file_paths: List[str], progress=gr.Progress(track_tqdm=True)) -> str:
|
| 92 |
global VECTOR_STORE
|
| 93 |
-
if not file_paths: return "
|
|
|
|
| 94 |
|
| 95 |
embedding_function = get_embedding_function()
|
| 96 |
text_splitter = get_text_splitter()
|
| 97 |
documents: List[Document] = []
|
| 98 |
-
for path in progress.tqdm(file_paths, desc="1/2:
|
| 99 |
if path is None: continue
|
| 100 |
text = extract_text_from_file(path)
|
| 101 |
if not text.strip(): continue
|
|
@@ -105,133 +115,156 @@ def index_files(file_paths: List[str], progress=gr.Progress(track_tqdm=True)) ->
|
|
| 105 |
doc = Document(page_content=chunk, metadata={"source": source_name})
|
| 106 |
documents.append(doc)
|
| 107 |
|
| 108 |
-
|
|
|
|
|
|
|
| 109 |
|
| 110 |
-
progress(0.5, desc="2/2:
|
| 111 |
new_store = FAISS.from_documents(documents, embedding_function)
|
|
|
|
| 112 |
|
| 113 |
if VECTOR_STORE is None: VECTOR_STORE = new_store
|
| 114 |
else: VECTOR_STORE.add_documents(documents)
|
| 115 |
|
|
|
|
| 116 |
final_count = VECTOR_STORE.index.ntotal
|
| 117 |
-
|
|
|
|
| 118 |
|
| 119 |
def clear_index() -> str:
|
| 120 |
global VECTOR_STORE
|
| 121 |
VECTOR_STORE = None
|
| 122 |
import gc; gc.collect()
|
| 123 |
-
|
|
|
|
| 124 |
|
| 125 |
def retrieve_relevant_chunks(query: str, top_k: int = 5) -> List[Dict]:
|
| 126 |
-
if VECTOR_STORE is None:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
results_with_scores = VECTOR_STORE.similarity_search_with_score(query, k=top_k)
|
| 128 |
-
|
|
|
|
| 129 |
"content": doc.page_content,
|
| 130 |
-
"source": doc.metadata.get("source", "
|
| 131 |
"score": 1 - score
|
| 132 |
} for doc, score in results_with_scores]
|
| 133 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
# --------------------------------------------------------------------
|
| 135 |
# LLM-Generierung mit Streaming
|
| 136 |
# --------------------------------------------------------------------
|
| 137 |
def build_rag_prompt(user_question: str, retrieved_chunks: List[Dict]) -> str:
|
| 138 |
-
# ... (Diese Funktion bleibt unverändert)
|
| 139 |
if not retrieved_chunks:
|
| 140 |
-
context_str = "
|
| 141 |
else:
|
| 142 |
context_parts = []
|
| 143 |
for i, ch in enumerate(retrieved_chunks, start=1):
|
| 144 |
-
context_parts.append(
|
| 145 |
-
f"Document [{i}] (Source: {ch['source']}, Relevance: {ch['score']:.3f}):\n\"{ch['content']}\""
|
| 146 |
-
)
|
| 147 |
context_str = "\n\n".join(context_parts)
|
| 148 |
-
prompt = (f"
|
| 149 |
-
f"
|
| 150 |
-
f"
|
| 151 |
-
f"
|
| 152 |
-
f"---
|
| 153 |
-
f"---
|
| 154 |
-
f"---
|
| 155 |
return prompt
|
| 156 |
|
| 157 |
def answer_with_rag(question: str, history: list) -> Generator[str, None, None]:
|
|
|
|
| 158 |
model, processor = get_llm()
|
| 159 |
-
# HIER DIE KORREKTUR: TextIteratorStreamer verwenden
|
| 160 |
streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
|
| 161 |
|
| 162 |
retrieved = retrieve_relevant_chunks(question, top_k=5)
|
| 163 |
prompt = build_rag_prompt(question, retrieved)
|
|
|
|
|
|
|
| 164 |
messages = [{"role": "user", "content": [{"type": "text", "text": prompt}]}]
|
| 165 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
input_ids = processor.apply_chat_template(
|
| 167 |
messages, tokenize=True, add_generation_prompt=True, return_tensors="pt"
|
| 168 |
).to(model.device)
|
| 169 |
|
|
|
|
|
|
|
|
|
|
| 170 |
generation_kwargs = {
|
| 171 |
-
"input_ids": input_ids,
|
| 172 |
-
"
|
| 173 |
-
"max_new_tokens": 1024,
|
| 174 |
-
"do_sample": True,
|
| 175 |
-
"temperature": 0.7,
|
| 176 |
-
"top_p": 0.9,
|
| 177 |
}
|
| 178 |
|
| 179 |
-
# Die Generierung muss in einem separaten Thread laufen, damit wir im Haupt-Thread
|
| 180 |
-
# über den Streamer iterieren können.
|
| 181 |
thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
| 182 |
thread.start()
|
|
|
|
| 183 |
|
| 184 |
-
# Jetzt können wir über den Streamer iterieren und die Tokens an die UI weitergeben.
|
| 185 |
for new_text in streamer:
|
| 186 |
yield new_text
|
|
|
|
| 187 |
|
| 188 |
# --------------------------------------------------------------------
|
| 189 |
# Gradio UI
|
| 190 |
# --------------------------------------------------------------------
|
| 191 |
def build_demo() -> gr.Blocks:
|
| 192 |
-
with gr.Blocks(title="Gemma 3 RAG v2.
|
| 193 |
gr.Markdown(
|
| 194 |
"""
|
| 195 |
-
# 🔍 Gemma 3 RAG v2.
|
| 196 |
-
**
|
| 197 |
-
|
| 198 |
-
2. Ask your questions in the chat window. The answers will be streamed live.
|
| 199 |
"""
|
| 200 |
)
|
| 201 |
with gr.Row():
|
| 202 |
with gr.Column(scale=1):
|
| 203 |
-
gr.Markdown("### 📁
|
| 204 |
-
file_uploader = gr.File(label="
|
| 205 |
with gr.Row():
|
| 206 |
-
index_button = gr.Button("🔄
|
| 207 |
-
clear_index_button = gr.Button("🧹
|
| 208 |
-
index_status = gr.Markdown("Index
|
| 209 |
index_button.click(fn=index_files, inputs=file_uploader, outputs=index_status)
|
| 210 |
clear_index_button.click(fn=clear_index, inputs=None, outputs=index_status)
|
| 211 |
with gr.Column(scale=2):
|
| 212 |
-
gr.Markdown("### 💬 Chat
|
| 213 |
chatbot = gr.Chatbot(label="Gemma-3 Chat", type="messages", show_copy_button=True, height=600, render_markdown=True)
|
| 214 |
with gr.Row():
|
| 215 |
-
msg_textbox = gr.Textbox(label="
|
| 216 |
-
send_btn = gr.Button("
|
| 217 |
|
| 218 |
def chat_interface(message: str, history: list):
|
| 219 |
if not message or not message.strip(): return history
|
|
|
|
| 220 |
history.append({"role": "user", "content": message})
|
| 221 |
history.append({"role": "assistant", "content": ""})
|
| 222 |
for token in answer_with_rag(message, history):
|
| 223 |
history[-1]["content"] += token
|
| 224 |
yield history
|
|
|
|
| 225 |
|
| 226 |
msg_textbox.submit(fn=chat_interface, inputs=[msg_textbox, chatbot], outputs=chatbot).then(fn=lambda: gr.update(value=""), outputs=msg_textbox)
|
| 227 |
send_btn.click(fn=chat_interface, inputs=[msg_textbox, chatbot], outputs=chatbot).then(fn=lambda: gr.update(value=""), outputs=msg_textbox)
|
| 228 |
return demo
|
| 229 |
|
| 230 |
if __name__ == "__main__":
|
| 231 |
-
print("
|
| 232 |
get_embedding_function()
|
| 233 |
get_llm()
|
| 234 |
|
| 235 |
app_demo = build_demo()
|
| 236 |
-
print("
|
| 237 |
app_demo.launch()
|
|
|
|
| 1 |
+
# app.py - v2.1 (Debug Edition)
|
| 2 |
+
# Beschreibung: Kombiniert die funktionale Stabilität der v2.0 mit dem umfangreichen
|
| 3 |
+
# Debugging und den Assertions früherer Versionen. Diese Version ist ideal
|
| 4 |
+
# für die Entwicklung, Fehlersuche und das Verständnis der internen Abläufe.
|
| 5 |
|
| 6 |
import os
|
| 7 |
import torch
|
| 8 |
import gradio as gr
|
| 9 |
+
import time
|
| 10 |
|
| 11 |
from typing import List, Tuple, Generator, Dict
|
| 12 |
from threading import Thread
|
| 13 |
|
| 14 |
# ML / Transformers
|
|
|
|
| 15 |
from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIteratorStreamer
|
| 16 |
|
| 17 |
# Dokumentenverarbeitung & RAG
|
|
|
|
| 32 |
LLM_PROCESSOR: AutoProcessor = None
|
| 33 |
VECTOR_STORE: FAISS = None
|
| 34 |
|
| 35 |
+
def print_debug(message: str):
|
| 36 |
+
"""Konsistente Debug-Ausgabe mit Zeitstempel."""
|
| 37 |
+
print(f"[DEBUG {time.strftime('%H:%M:%S')}] {message}")
|
| 38 |
+
|
| 39 |
# --------------------------------------------------------------------
|
| 40 |
# Model Loading
|
| 41 |
# --------------------------------------------------------------------
|
|
|
|
| 46 |
def get_embedding_function() -> HuggingFaceEmbeddings:
|
| 47 |
global EMBEDDING_FUNCTION
|
| 48 |
if EMBEDDING_FUNCTION is None:
|
| 49 |
+
device = get_device()
|
| 50 |
+
print_debug(f"Initialisiere Embedding-Modell '{EMBED_MODEL_ID}' auf Device '{device}'.")
|
| 51 |
EMBEDDING_FUNCTION = HuggingFaceEmbeddings(
|
| 52 |
model_name=EMBED_MODEL_ID,
|
| 53 |
+
model_kwargs={'device': device}
|
| 54 |
)
|
| 55 |
+
print_debug("Embedding-Modell erfolgreich initialisiert.")
|
| 56 |
return EMBEDDING_FUNCTION
|
| 57 |
|
| 58 |
def get_llm() -> Tuple[Gemma3ForConditionalGeneration, AutoProcessor]:
|
| 59 |
global LLM_MODEL, LLM_PROCESSOR
|
| 60 |
if LLM_MODEL is None or LLM_PROCESSOR is None:
|
| 61 |
device = get_device()
|
| 62 |
+
print_debug(f"Initialisiere LLM '{LLM_MODEL_ID}' auf Device '{device}'.")
|
| 63 |
dtype = torch.bfloat16 if "cuda" in device.type else torch.float32
|
| 64 |
LLM_MODEL = Gemma3ForConditionalGeneration.from_pretrained(
|
| 65 |
LLM_MODEL_ID,
|
|
|
|
| 67 |
device_map="auto",
|
| 68 |
).eval()
|
| 69 |
LLM_PROCESSOR = AutoProcessor.from_pretrained(LLM_MODEL_ID)
|
| 70 |
+
print_debug("LLM und Prozessor erfolgreich initialisiert.")
|
| 71 |
return LLM_MODEL, LLM_PROCESSOR
|
| 72 |
|
| 73 |
# --------------------------------------------------------------------
|
|
|
|
| 99 |
# --------------------------------------------------------------------
|
| 100 |
def index_files(file_paths: List[str], progress=gr.Progress(track_tqdm=True)) -> str:
|
| 101 |
global VECTOR_STORE
|
| 102 |
+
if not file_paths: return "Keine Dateien zum Indexieren ausgewählt."
|
| 103 |
+
print_debug(f"Indexierung gestartet für {len(file_paths)} Datei(en).")
|
| 104 |
|
| 105 |
embedding_function = get_embedding_function()
|
| 106 |
text_splitter = get_text_splitter()
|
| 107 |
documents: List[Document] = []
|
| 108 |
+
for path in progress.tqdm(file_paths, desc="1/2: Dateien verarbeiten & chunken"):
|
| 109 |
if path is None: continue
|
| 110 |
text = extract_text_from_file(path)
|
| 111 |
if not text.strip(): continue
|
|
|
|
| 115 |
doc = Document(page_content=chunk, metadata={"source": source_name})
|
| 116 |
documents.append(doc)
|
| 117 |
|
| 118 |
+
assert all(isinstance(d, Document) for d in documents), "Alle Elemente in 'documents' müssen vom Typ langchain.Document sein."
|
| 119 |
+
print_debug(f"Erfolgreich {len(documents)} Chunks aus den Dateien erstellt.")
|
| 120 |
+
if not documents: return "Kein Text in den Dateien gefunden, der indexiert werden konnte."
|
| 121 |
|
| 122 |
+
progress(0.5, desc="2/2: Embeddings erstellen & FAISS Index aufbauen...")
|
| 123 |
new_store = FAISS.from_documents(documents, embedding_function)
|
| 124 |
+
print_debug("FAISS Index erfolgreich aus Dokumenten erstellt.")
|
| 125 |
|
| 126 |
if VECTOR_STORE is None: VECTOR_STORE = new_store
|
| 127 |
else: VECTOR_STORE.add_documents(documents)
|
| 128 |
|
| 129 |
+
assert VECTOR_STORE is not None and VECTOR_STORE.index.ntotal > 0, "VECTOR_STORE wurde nicht korrekt initialisiert."
|
| 130 |
final_count = VECTOR_STORE.index.ntotal
|
| 131 |
+
print_debug(f"Indexierung abgeschlossen. Gesamtanzahl der Chunks im Index: {final_count}")
|
| 132 |
+
return f"Index aktualisiert: {final_count} Chunks insgesamt."
|
| 133 |
|
| 134 |
def clear_index() -> str:
|
| 135 |
global VECTOR_STORE
|
| 136 |
VECTOR_STORE = None
|
| 137 |
import gc; gc.collect()
|
| 138 |
+
print_debug("Vektor-Index wurde geleert.")
|
| 139 |
+
return "Index geleert."
|
| 140 |
|
| 141 |
def retrieve_relevant_chunks(query: str, top_k: int = 5) -> List[Dict]:
|
| 142 |
+
if VECTOR_STORE is None:
|
| 143 |
+
print_debug("Retrieval versucht, aber Vektor-Index ist leer.")
|
| 144 |
+
return []
|
| 145 |
+
|
| 146 |
+
print_debug(f"Suche nach {top_k} relevanten Chunks für die Anfrage: '{query}'")
|
| 147 |
results_with_scores = VECTOR_STORE.similarity_search_with_score(query, k=top_k)
|
| 148 |
+
|
| 149 |
+
formatted_results = [{
|
| 150 |
"content": doc.page_content,
|
| 151 |
+
"source": doc.metadata.get("source", "Unbekannt"),
|
| 152 |
"score": 1 - score
|
| 153 |
} for doc, score in results_with_scores]
|
| 154 |
|
| 155 |
+
assert isinstance(formatted_results, list), "Retrieval-Ergebnis muss eine Liste sein."
|
| 156 |
+
if formatted_results:
|
| 157 |
+
assert all("content" in r and "source" in r and "score" in r for r in formatted_results), "Jedes Retrieval-Ergebnis muss 'content', 'source' und 'score' enthalten."
|
| 158 |
+
|
| 159 |
+
print_debug(f"{len(formatted_results)} Chunks gefunden.")
|
| 160 |
+
return formatted_results
|
| 161 |
+
|
| 162 |
# --------------------------------------------------------------------
|
| 163 |
# LLM-Generierung mit Streaming
|
| 164 |
# --------------------------------------------------------------------
|
| 165 |
def build_rag_prompt(user_question: str, retrieved_chunks: List[Dict]) -> str:
|
|
|
|
| 166 |
if not retrieved_chunks:
|
| 167 |
+
context_str = "Es wurden keine relevanten Dokumente im Kontext gefunden."
|
| 168 |
else:
|
| 169 |
context_parts = []
|
| 170 |
for i, ch in enumerate(retrieved_chunks, start=1):
|
| 171 |
+
context_parts.append(f"Dokument [{i}] (Quelle: {ch['source']}, Relevanz: {ch['score']:.3f}):\n\"{ch['content']}\"")
|
|
|
|
|
|
|
| 172 |
context_str = "\n\n".join(context_parts)
|
| 173 |
+
prompt = (f"Du bist ein präziser, hilfreicher Assistent. Deine Aufgabe ist es, die folgende Benutzerfrage ausschließlich "
|
| 174 |
+
f"basierend auf den unten stehenden Kontext-Dokumenten zu beantworten. "
|
| 175 |
+
f"Wenn die Antwort nicht in den Dokumenten enthalten ist, gib klar an: 'Die Information ist in den bereitgestellten Dokumenten nicht enthalten.' "
|
| 176 |
+
f"Antworte auf Deutsch und fasse die relevanten Informationen zusammen, anstatt die Dokumente wörtlich zu zitieren.\n\n"
|
| 177 |
+
f"--- Kontext-Dokumente ---\n{context_str}\n\n"
|
| 178 |
+
f"--- Benutzerfrage ---\n{user_question}\n\n"
|
| 179 |
+
f"--- Deine Antwort ---\n")
|
| 180 |
return prompt
|
| 181 |
|
| 182 |
def answer_with_rag(question: str, history: list) -> Generator[str, None, None]:
|
| 183 |
+
print_debug("Starte RAG-Antwort-Generierung.")
|
| 184 |
model, processor = get_llm()
|
|
|
|
| 185 |
streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
|
| 186 |
|
| 187 |
retrieved = retrieve_relevant_chunks(question, top_k=5)
|
| 188 |
prompt = build_rag_prompt(question, retrieved)
|
| 189 |
+
print_debug(f"Generierter RAG-Prompt (erste 200 Zeichen): '{prompt}'")
|
| 190 |
+
|
| 191 |
messages = [{"role": "user", "content": [{"type": "text", "text": prompt}]}]
|
| 192 |
|
| 193 |
+
print_debug(f"Nachrichten-Struktur wird für Prozessor vorbereitet: {str(messages)}")
|
| 194 |
+
assert isinstance(messages, list) and len(messages) > 0, "Messages muss eine nicht-leere Liste sein."
|
| 195 |
+
assert isinstance(messages[0], dict) and "role" in messages[0] and "content" in messages[0], "Nachricht muss ein Dictionary mit 'role' und 'content' sein."
|
| 196 |
+
assert isinstance(messages[0]["content"], list) and len(messages[0]["content"]) > 0, "Content muss eine nicht-leere Liste sein."
|
| 197 |
+
assert isinstance(messages[0]["content"][0], dict) and "type" in messages[0]["content"][0] and "text" in messages[0]["content"][0], "Content-Block muss ein Dictionary mit 'type' und 'text' sein."
|
| 198 |
+
print_debug("ASSERTIONS für Nachrichten-Struktur erfolgreich bestanden.")
|
| 199 |
+
|
| 200 |
input_ids = processor.apply_chat_template(
|
| 201 |
messages, tokenize=True, add_generation_prompt=True, return_tensors="pt"
|
| 202 |
).to(model.device)
|
| 203 |
|
| 204 |
+
assert isinstance(input_ids, torch.Tensor), "Der Prozessor sollte einen torch.Tensor zurückgeben."
|
| 205 |
+
print_debug(f"Prozessor hat 'input_ids' mit der Form {input_ids.shape} erstellt.")
|
| 206 |
+
|
| 207 |
generation_kwargs = {
|
| 208 |
+
"input_ids": input_ids, "streamer": streamer, "max_new_tokens": 1024,
|
| 209 |
+
"do_sample": True, "temperature": 0.7, "top_p": 0.9,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
}
|
| 211 |
|
|
|
|
|
|
|
| 212 |
thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
| 213 |
thread.start()
|
| 214 |
+
print_debug("LLM-Generierungs-Thread gestartet.")
|
| 215 |
|
|
|
|
| 216 |
for new_text in streamer:
|
| 217 |
yield new_text
|
| 218 |
+
print_debug("LLM-Generierung abgeschlossen.")
|
| 219 |
|
| 220 |
# --------------------------------------------------------------------
|
| 221 |
# Gradio UI
|
| 222 |
# --------------------------------------------------------------------
|
| 223 |
def build_demo() -> gr.Blocks:
|
| 224 |
+
with gr.Blocks(title="Gemma 3 RAG v2.1", theme="soft") as demo:
|
| 225 |
gr.Markdown(
|
| 226 |
"""
|
| 227 |
+
# 🔍 Gemma 3 RAG v2.1 – Debug Edition
|
| 228 |
+
**Eine "State of the Art" RAG-Pipeline mit `google/embeddinggemma-300m` und `google/gemma-3-4b-it`**
|
| 229 |
+
Diese Version enthält umfangreiche Debug-Ausgaben in der Konsole.
|
|
|
|
| 230 |
"""
|
| 231 |
)
|
| 232 |
with gr.Row():
|
| 233 |
with gr.Column(scale=1):
|
| 234 |
+
gr.Markdown("### 📁 Dokumenten-Management")
|
| 235 |
+
file_uploader = gr.File(label="Dateien hochladen (.pdf, .txt, .md)", file_count="multiple", type="filepath")
|
| 236 |
with gr.Row():
|
| 237 |
+
index_button = gr.Button("🔄 Index aktualisieren", variant="primary")
|
| 238 |
+
clear_index_button = gr.Button("🧹 Index leeren")
|
| 239 |
+
index_status = gr.Markdown("Index ist leer.")
|
| 240 |
index_button.click(fn=index_files, inputs=file_uploader, outputs=index_status)
|
| 241 |
clear_index_button.click(fn=clear_index, inputs=None, outputs=index_status)
|
| 242 |
with gr.Column(scale=2):
|
| 243 |
+
gr.Markdown("### 💬 Chat über deine Dokumente")
|
| 244 |
chatbot = gr.Chatbot(label="Gemma-3 Chat", type="messages", show_copy_button=True, height=600, render_markdown=True)
|
| 245 |
with gr.Row():
|
| 246 |
+
msg_textbox = gr.Textbox(label="Deine Frage", placeholder="Stelle eine Frage zu den Dokumenten...", scale=4, autofocus=True)
|
| 247 |
+
send_btn = gr.Button("Senden", variant="primary", scale=1)
|
| 248 |
|
| 249 |
def chat_interface(message: str, history: list):
|
| 250 |
if not message or not message.strip(): return history
|
| 251 |
+
print_debug(f"Neue Benutzernachricht empfangen: '{message}'")
|
| 252 |
history.append({"role": "user", "content": message})
|
| 253 |
history.append({"role": "assistant", "content": ""})
|
| 254 |
for token in answer_with_rag(message, history):
|
| 255 |
history[-1]["content"] += token
|
| 256 |
yield history
|
| 257 |
+
print_debug("Streaming an die UI beendet.")
|
| 258 |
|
| 259 |
msg_textbox.submit(fn=chat_interface, inputs=[msg_textbox, chatbot], outputs=chatbot).then(fn=lambda: gr.update(value=""), outputs=msg_textbox)
|
| 260 |
send_btn.click(fn=chat_interface, inputs=[msg_textbox, chatbot], outputs=chatbot).then(fn=lambda: gr.update(value=""), outputs=msg_textbox)
|
| 261 |
return demo
|
| 262 |
|
| 263 |
if __name__ == "__main__":
|
| 264 |
+
print("Anwendung wird gestartet... Modelle werden initialisiert.")
|
| 265 |
get_embedding_function()
|
| 266 |
get_llm()
|
| 267 |
|
| 268 |
app_demo = build_demo()
|
| 269 |
+
print("Modelle geladen. Gradio-Interface wird gestartet.")
|
| 270 |
app_demo.launch()
|
test/wikitop10.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
test/wikitop100.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|