Spaces:
Runtime error
Runtime error
PiotrtWitek commited on
Commit ·
3edfced
1
Parent(s): 32418e0
Dodanie aplikacji Szuflada
Browse files
app.py
CHANGED
|
@@ -1,265 +1,62 @@
|
|
| 1 |
-
# Wersja zmodyfikowana przez ChatGPT.
|
| 2 |
-
# Tworzenie przy starcie aplikacji bazy tylko wtedy, gdy jej nie ma.
|
| 3 |
-
import os
|
| 4 |
import sys
|
| 5 |
import uuid
|
| 6 |
-
import time
|
| 7 |
-
from typing import List, Dict, Any, Optional, Tuple
|
| 8 |
|
| 9 |
import gradio as gr
|
| 10 |
|
| 11 |
-
|
| 12 |
-
# - load_existing_database(persist_dir: str) -> obiekt bazy/collection lub None
|
| 13 |
-
# - update_database(persist_dir: str, progress_cb: callable | None = None) -> obiekt bazy/collection (po aktualizacji)
|
| 14 |
-
# - initialize_database(persist_dir: str = "data/chroma",
|
| 15 |
-
# batch_size: int = 64,
|
| 16 |
-
# persist_every: int = 200,
|
| 17 |
-
# progress_cb: callable | None = None) -> obiekt bazy/collection
|
| 18 |
-
from database_setup import initialize_database # wymagane
|
| 19 |
-
try:
|
| 20 |
-
from database_setup import load_existing_database # opcjonalne, ale zalecane
|
| 21 |
-
except Exception:
|
| 22 |
-
load_existing_database = None # type: ignore
|
| 23 |
-
try:
|
| 24 |
-
from database_setup import update_database # opcjonalne, ale zalecane
|
| 25 |
-
except Exception:
|
| 26 |
-
update_database = None # type: ignore
|
| 27 |
-
|
| 28 |
from chat_utils import create_rag_chain, format_sources, create_session_history_manager
|
| 29 |
from langchain_core.runnables.history import RunnableWithMessageHistory
|
| 30 |
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
rag_chain =
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
size /= 1024
|
| 50 |
-
return f"{size:.1f} PB"
|
| 51 |
-
|
| 52 |
-
def db_disk_stats(persist_dir: str) -> Tuple[int,int]:
|
| 53 |
-
"""Zwraca (liczba_plików, suma_rozmiarów_bajtów) dla katalogu bazy."""
|
| 54 |
-
if not os.path.isdir(persist_dir):
|
| 55 |
-
return 0, 0
|
| 56 |
-
total = 0
|
| 57 |
-
files = 0
|
| 58 |
-
for root, _, filenames in os.walk(persist_dir):
|
| 59 |
-
for fn in filenames:
|
| 60 |
-
files += 1
|
| 61 |
-
fp = os.path.join(root, fn)
|
| 62 |
-
try:
|
| 63 |
-
total += os.path.getsize(fp)
|
| 64 |
-
except Exception:
|
| 65 |
-
pass
|
| 66 |
-
return files, total
|
| 67 |
-
|
| 68 |
-
def is_database_empty(db) -> bool:
|
| 69 |
-
"""True jeśli baza nie istnieje lub nie zawiera żadnych rekordów."""
|
| 70 |
-
try:
|
| 71 |
-
if db is None:
|
| 72 |
-
return True
|
| 73 |
-
if hasattr(db, "count"):
|
| 74 |
-
return db.count() == 0
|
| 75 |
-
if hasattr(db, "get"):
|
| 76 |
-
res = db.get(limit=1)
|
| 77 |
-
if isinstance(res, dict):
|
| 78 |
-
ids = res.get("ids") or []
|
| 79 |
-
return len(ids) == 0
|
| 80 |
-
return False
|
| 81 |
-
except Exception:
|
| 82 |
-
return True
|
| 83 |
-
|
| 84 |
-
def attach_rag_chain(db) -> None:
|
| 85 |
-
"""Tworzy/aktualizuje globalny łańcuch RAG po zmianie bazy."""
|
| 86 |
-
global baza, rag_chain, conversational_rag_chain
|
| 87 |
-
baza = db
|
| 88 |
-
get_session_history = create_session_history_manager()
|
| 89 |
-
rag_chain = create_rag_chain(baza)
|
| 90 |
-
conversational_rag_chain = RunnableWithMessageHistory(
|
| 91 |
-
rag_chain,
|
| 92 |
-
get_session_history,
|
| 93 |
-
input_messages_key="input",
|
| 94 |
-
history_messages_key="chat_history",
|
| 95 |
-
output_messages_key="answer",
|
| 96 |
-
)
|
| 97 |
-
log("RAG chain gotowy.")
|
| 98 |
-
|
| 99 |
-
def load_db_if_exists() -> Optional[Any]:
|
| 100 |
-
"""Wczytuje istniejącą bazę, jeśli jest funkcja load_existing_database, inaczej None."""
|
| 101 |
-
if load_existing_database is None:
|
| 102 |
-
log("Brak load_existing_database – pomijam wczytywanie.")
|
| 103 |
-
return None
|
| 104 |
try:
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
log("Baza została wczytana.")
|
| 109 |
-
return db
|
| 110 |
-
except Exception as e:
|
| 111 |
-
log(f"Nie udało się wczytać bazy: {e}")
|
| 112 |
-
return None
|
| 113 |
-
|
| 114 |
-
# ---------- Funkcje do UI: status oraz inicjalizacja/aktualizacja ----------
|
| 115 |
-
|
| 116 |
-
def get_db_status() -> str:
|
| 117 |
-
"""Zwraca opis stanu bazy do wyświetlenia w UI."""
|
| 118 |
-
db = load_db_if_exists()
|
| 119 |
-
empty = is_database_empty(db)
|
| 120 |
-
file_count, total_bytes = db_disk_stats(PERSIST_DIR)
|
| 121 |
-
size_txt = human_size(total_bytes)
|
| 122 |
-
lines = []
|
| 123 |
-
lines.append(f"Katalog persist: {PERSIST_DIR}")
|
| 124 |
-
lines.append(f"Plików w katalogu: {file_count}, łączny rozmiar: {size_txt}")
|
| 125 |
-
if db is None:
|
| 126 |
-
lines.append("Stan bazy: brak lub nie udało się wczytać.")
|
| 127 |
-
else:
|
| 128 |
-
lines.append("Stan bazy: wczytana.")
|
| 129 |
-
lines.append(f"Czy pusta: {'tak' if empty else 'nie'}")
|
| 130 |
-
if not empty and hasattr(db, 'count'):
|
| 131 |
-
try:
|
| 132 |
-
lines.append(f"Liczba rekordów: {db.count()}")
|
| 133 |
-
except Exception:
|
| 134 |
-
pass
|
| 135 |
-
# Jeżeli baza jest wczytana i niepusta, zadbaj, by łańcuch był aktywny
|
| 136 |
-
if db and not empty:
|
| 137 |
-
attach_rag_chain(db)
|
| 138 |
-
return "\n".join(lines)
|
| 139 |
-
|
| 140 |
-
def init_or_update_database(progress=gr.Progress(track_tqdm=False)) -> str:
|
| 141 |
-
"""
|
| 142 |
-
Ręczna inicjalizacja lub aktualizacja bazy z przycisku.
|
| 143 |
-
Wyświetla krok po kroku, bez blokowania czatu.
|
| 144 |
-
"""
|
| 145 |
-
progress(0, desc="Start procesu")
|
| 146 |
-
db = load_db_if_exists()
|
| 147 |
-
if db is None or is_database_empty(db):
|
| 148 |
-
progress(0.05, desc="Budowanie bazy od zera")
|
| 149 |
-
try:
|
| 150 |
-
# initialize_database może przyjąć progress_cb, ale nie wymagamy tego
|
| 151 |
-
db = initialize_database(persist_dir=PERSIST_DIR) # type: ignore
|
| 152 |
-
progress(0.75, desc="Zapis i przygotowanie RAG")
|
| 153 |
-
attach_rag_chain(db)
|
| 154 |
-
progress(1.0, desc="Zakończono: baza zbudowana")
|
| 155 |
-
return "Baza zbudowana i gotowa."
|
| 156 |
-
except TypeError:
|
| 157 |
-
db = initialize_database() # type: ignore
|
| 158 |
-
attach_rag_chain(db)
|
| 159 |
-
progress(1.0, desc="Zakończono: baza zbudowana (bez persist_dir)")
|
| 160 |
-
return "Baza zbudowana i gotowa (initialize_database bez persist_dir)."
|
| 161 |
-
except Exception as e:
|
| 162 |
-
progress(1.0, desc="Błąd budowy")
|
| 163 |
-
return f"Błąd podczas budowania bazy: {e}"
|
| 164 |
-
else:
|
| 165 |
-
if AUTO_REFRESH_DB:
|
| 166 |
-
progress(0.1, desc="Aktualizacja istniejącej bazy")
|
| 167 |
-
try:
|
| 168 |
-
if update_database:
|
| 169 |
-
db = update_database(PERSIST_DIR) # type: ignore
|
| 170 |
-
progress(0.8, desc="Aktualizacja zakończona, odświeżam RAG")
|
| 171 |
-
attach_rag_chain(db)
|
| 172 |
-
progress(1.0, desc="Gotowe")
|
| 173 |
-
return "Baza zaktualizowana i gotowa."
|
| 174 |
-
else:
|
| 175 |
-
# fallback – spróbuj initialize_database jako „update”, jeśli wspiera
|
| 176 |
-
try:
|
| 177 |
-
db = initialize_database(persist_dir=PERSIST_DIR, mode="update") # type: ignore
|
| 178 |
-
attach_rag_chain(db)
|
| 179 |
-
progress(1.0, desc="Gotowe")
|
| 180 |
-
return "Baza zaktualizowana przez initialize_database(mode='update')."
|
| 181 |
-
except TypeError:
|
| 182 |
-
progress(1.0, desc="Pominięto")
|
| 183 |
-
return "Brak update_database i brak wsparcia mode='update' – pomijam aktualizację."
|
| 184 |
-
except Exception as e:
|
| 185 |
-
progress(1.0, desc="Błąd aktualizacji")
|
| 186 |
-
return f"Błąd podczas aktualizacji bazy: {e}"
|
| 187 |
-
else:
|
| 188 |
-
progress(1.0, desc="Pominięto")
|
| 189 |
-
return "AUTO_REFRESH_DB=false – aktualizacja pominięta. Baza została tylko wczytana."
|
| 190 |
-
|
| 191 |
-
# ---------- Obsługa czatu ----------
|
| 192 |
-
|
| 193 |
-
def respond(user_input: str, messages: List[Dict[str, Any]] | None, sess_id: str | None):
|
| 194 |
-
"""
|
| 195 |
-
Obsługuje odpowiedź czatu.
|
| 196 |
-
Jeśli baza niegotowa/pusta – nie wisi, tylko prosi o inicjalizację z panelu.
|
| 197 |
-
"""
|
| 198 |
-
if messages is None:
|
| 199 |
-
messages = []
|
| 200 |
-
messages.append({"role": "user", "content": user_input})
|
| 201 |
-
|
| 202 |
-
sid = sess_id if isinstance(sess_id, str) and sess_id else str(uuid.uuid4())
|
| 203 |
-
|
| 204 |
-
# Jeżeli łańcuch nie jest gotowy, spróbuj wczytać bazę i tylko podpiąć RAG – bez budowy.
|
| 205 |
-
global conversational_rag_chain
|
| 206 |
-
if conversational_rag_chain is None:
|
| 207 |
-
db = load_db_if_exists()
|
| 208 |
-
if db and not is_database_empty(db):
|
| 209 |
-
attach_rag_chain(db)
|
| 210 |
-
else:
|
| 211 |
-
messages.append({
|
| 212 |
-
"role": "assistant",
|
| 213 |
-
"content": (
|
| 214 |
-
"Baza nie jest jeszcze gotowa. Wejdź w sekcję „Zarządzanie bazą” i wybierz: "
|
| 215 |
-
"Zainicjalizuj lub zaktualizuj bazę. "
|
| 216 |
-
"Po zakończeniu wróć do czatu i zadaj pytanie ponownie."
|
| 217 |
-
)
|
| 218 |
-
})
|
| 219 |
-
return messages
|
| 220 |
-
|
| 221 |
-
# W tym miejscu zakładamy, że RAG jest już gotowy
|
| 222 |
-
try:
|
| 223 |
-
assert conversational_rag_chain is not None
|
| 224 |
-
start_t = time.time()
|
| 225 |
-
result = conversational_rag_chain.invoke(
|
| 226 |
-
{"input": user_input},
|
| 227 |
-
config={"configurable": {"session_id": sid}},
|
| 228 |
)
|
| 229 |
-
elapsed = time.time() - start_t
|
| 230 |
-
log(f"RAG inference: {elapsed:.2f}s")
|
| 231 |
except Exception as e:
|
| 232 |
-
|
| 233 |
-
return
|
| 234 |
|
| 235 |
-
context_docs =
|
|
|
|
| 236 |
try:
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
for i, (doc, score) in enumerate(debug_scores):
|
| 241 |
-
print(f"Chunk {i+1}: similarity_score={score}, title={doc.metadata.get('title')}", flush=True)
|
| 242 |
except Exception:
|
| 243 |
pass
|
| 244 |
|
| 245 |
sources_md = format_sources(context_docs)
|
| 246 |
-
answer =
|
| 247 |
-
answer_with_sources = f"{answer}\n\
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
return messages
|
| 251 |
-
|
| 252 |
-
# ---------- UI ----------
|
| 253 |
|
| 254 |
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), title="Szuflada Chatbot") as demo:
|
| 255 |
-
session_id = gr.State(str(uuid.uuid4()))
|
| 256 |
|
| 257 |
gr.Markdown(
|
| 258 |
"# Czat z Moją Szufladą\n"
|
| 259 |
-
"### Zadaj pytanie na temat treści ze strony mojaszuflada.pl"
|
| 260 |
)
|
| 261 |
-
|
| 262 |
-
chatbot = gr.Chatbot(label="Rozmowa", height=500, type="messages")
|
| 263 |
|
| 264 |
with gr.Row():
|
| 265 |
msg = gr.Textbox(
|
|
@@ -275,18 +72,6 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), title="Szuflada Chatbot
|
|
| 275 |
msg.submit(respond, [msg, chatbot, session_id], [chatbot]) \
|
| 276 |
.then(lambda: gr.update(value=""), None, [msg], queue=False)
|
| 277 |
|
| 278 |
-
with gr.Accordion("Zarządzanie bazą", open=False):
|
| 279 |
-
status_btn = gr.Button("Sprawdź status bazy")
|
| 280 |
-
init_btn = gr.Button("Zainicjalizuj lub zaktualizuj bazę", variant="primary")
|
| 281 |
-
status_out = gr.Textbox(label="Status", lines=8)
|
| 282 |
-
init_out = gr.Textbox(label="Wynik inicjalizacji/aktualizacji", lines=6)
|
| 283 |
-
|
| 284 |
-
status_btn.click(fn=get_db_status, inputs=None, outputs=status_out)
|
| 285 |
-
init_btn.click(fn=init_or_update_database, inputs=None, outputs=init_out) \
|
| 286 |
-
.then(fn=get_db_status, inputs=None, outputs=status_out)
|
| 287 |
-
|
| 288 |
if __name__ == "__main__":
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
server_port=int(os.getenv("PORT", 7860))
|
| 292 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import sys
|
| 2 |
import uuid
|
|
|
|
|
|
|
| 3 |
|
| 4 |
import gradio as gr
|
| 5 |
|
| 6 |
+
from database_setup import initialize_database
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
from chat_utils import create_rag_chain, format_sources, create_session_history_manager
|
| 8 |
from langchain_core.runnables.history import RunnableWithMessageHistory
|
| 9 |
|
| 10 |
+
print("Inicjalizacja bazy danych...")
|
| 11 |
+
baza = initialize_database()
|
| 12 |
+
if baza is None:
|
| 13 |
+
print("Nie udało się zainicjalizować bazy danych. Zakończenie pracy.")
|
| 14 |
+
sys.exit(1)
|
| 15 |
+
|
| 16 |
+
rag_chain = create_rag_chain(baza)
|
| 17 |
+
get_session_history = create_session_history_manager()
|
| 18 |
+
conversational_rag_chain = RunnableWithMessageHistory(
|
| 19 |
+
rag_chain,
|
| 20 |
+
get_session_history,
|
| 21 |
+
input_messages_key="input",
|
| 22 |
+
history_messages_key="chat_history",
|
| 23 |
+
output_messages_key="answer",
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
def respond(message, chat_history, sess_id):
|
| 27 |
+
"""Obsługuje odpowiedź na wiadomość użytkownika."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
try:
|
| 29 |
+
response = conversational_rag_chain.invoke(
|
| 30 |
+
{"input": message},
|
| 31 |
+
config={"configurable": {"session_id": sess_id}},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
)
|
|
|
|
|
|
|
| 33 |
except Exception as e:
|
| 34 |
+
chat_history.append((message, f"Błąd podczas przetwarzania: {e}"))
|
| 35 |
+
return chat_history
|
| 36 |
|
| 37 |
+
context_docs = response.get("context", [])
|
| 38 |
+
# Debug: wypisz poziom podobieństwa
|
| 39 |
try:
|
| 40 |
+
debug_scores = baza.similarity_search_with_score(message, k=len(context_docs))
|
| 41 |
+
for i, (doc, score) in enumerate(debug_scores):
|
| 42 |
+
print(f"Chunk {i+1}: similarity_score={score}, title={doc.metadata.get('title')}")
|
|
|
|
|
|
|
| 43 |
except Exception:
|
| 44 |
pass
|
| 45 |
|
| 46 |
sources_md = format_sources(context_docs)
|
| 47 |
+
answer = response.get("answer") or ""
|
| 48 |
+
answer_with_sources = f"{answer}\n\n**Źródła:**\n{sources_md}"
|
| 49 |
+
chat_history.append((message, answer_with_sources))
|
| 50 |
+
return chat_history
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), title="Szuflada Chatbot") as demo:
|
| 53 |
+
session_id = gr.State(lambda: str(uuid.uuid4()))
|
| 54 |
|
| 55 |
gr.Markdown(
|
| 56 |
"# Czat z Moją Szufladą\n"
|
| 57 |
+
"### Zadaj pytanie na temat treści ze strony [mojaszuflada.pl](https://mojaszuflada.pl)"
|
| 58 |
)
|
| 59 |
+
chatbot = gr.Chatbot(label="Rozmowa", height=500)
|
|
|
|
| 60 |
|
| 61 |
with gr.Row():
|
| 62 |
msg = gr.Textbox(
|
|
|
|
| 72 |
msg.submit(respond, [msg, chatbot, session_id], [chatbot]) \
|
| 73 |
.then(lambda: gr.update(value=""), None, [msg], queue=False)
|
| 74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
if __name__ == "__main__":
|
| 76 |
+
# demo.launch(inbrowser=True)
|
| 77 |
+
demo.launch(server_name="127.0.0.1", inbrowser=False, share=False) # Zmiana PW
|
|
|
|
|
|