RedJul2110 commited on
Commit
a1ca032
·
verified ·
1 Parent(s): 339ab2a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +397 -57
app.py CHANGED
@@ -16,7 +16,7 @@ from difflib import SequenceMatcher
16
 
17
  import torch
18
  import gradio as gr
19
- from transformers import AutoTokenizer, AutoModelForCausalLM
20
  from huggingface_hub import hf_hub_download, upload_file
21
 
22
  try:
@@ -29,7 +29,7 @@ except Exception:
29
  # =========================================================
30
 
31
  # --- MODELL ---
32
- MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct"
33
  # Beispiele:
34
  # "Qwen/Qwen3-0.6B" → sehr klein, schnell, schwächer
35
  # "Qwen/Qwen3-1.7B" → gute Balance (empfohlen)
@@ -50,6 +50,7 @@ os.makedirs(DATA_DIR, exist_ok=True)
50
  WISSEN_FILE = os.path.join(DATA_DIR, "wissen.json")
51
  CHAT_FILE = os.path.join(DATA_DIR, "chat_history.json")
52
  LOG_FILE = os.path.join(DATA_DIR, "ai_log.txt")
 
53
 
54
  # --- ANTWORTLÄNGE (wie lang darf die KI antworten?) ---
55
  MAX_NEW_TOKENS_CHAT = 80 # Normale Chat-Antwort
@@ -87,6 +88,7 @@ MAX_CONTEXT_TURNS = 2 # Wie viele Nachrichten als Kontext genutzt werden
87
  # --- KI-PERSÖNLICHKEIT ---
88
  AI_NAME = "RedJul2110"
89
  FALLBACK_NO_INFO = "Dazu habe ich gerade keine sichere Antwort."
 
90
 
91
  # --- WISSENSDATENBANK ---
92
  USE_QWEN_POLISH = True # True = KI verfeinert Antworten mit Wissen
@@ -103,6 +105,8 @@ DB_FACT_MATCH_THRESHOLD = 0.70 # Ab wann gilt ein Fakt als passend? (0.0–1.
103
  # =========================================================
104
  model = None
105
  tokenizer = None
 
 
106
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
107
 
108
  knowledge_lock = threading.Lock()
@@ -117,6 +121,42 @@ letzte_wissensänderung = None
117
  letzte_api_latenz = None
118
  letzter_fehler = None
119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  # =========================================================
121
  # HILFSFUNKTIONEN
122
  # =========================================================
@@ -349,7 +389,47 @@ def upload_wissen_background():
349
  finally:
350
  upload_in_progress = False
351
 
352
- def db_match_score(query, item):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
353
  q_norm = normalize_text(query)
354
  frage = normalize_text(item.get("frage", ""))
355
  antwort = item.get("antwort", "")
@@ -365,6 +445,11 @@ def db_match_score(query, item):
365
  c_tokens = text_tokens(blob_norm)
366
  token_score = len(q_tokens & c_tokens) / max(len(q_tokens), 1)
367
 
 
 
 
 
 
368
  bonus = 0.0
369
  if q_norm == frage:
370
  bonus += 0.35
@@ -375,7 +460,7 @@ def db_match_score(query, item):
375
  if kategorie and q_norm == kategorie:
376
  bonus += 0.08
377
 
378
- return (seq * 0.6) + (token_score * 0.4) + bonus
379
 
380
  def exact_db_answer(user_message):
381
  q = normalize_text(user_message)
@@ -404,9 +489,10 @@ def best_db_answer(user_message, threshold=DB_DIRECT_MATCH_THRESHOLD):
404
 
405
  best_item = None
406
  best_score = 0.0
 
407
 
408
  for item in data:
409
- score = db_match_score(user_message, item)
410
  if score > best_score:
411
  best_score = score
412
  best_item = item
@@ -422,8 +508,9 @@ def find_relevant_facts(query, max_items=6, min_score=DB_FACT_MATCH_THRESHOLD):
422
  return []
423
 
424
  scored = []
 
425
  for item in data:
426
- score = db_match_score(query, item)
427
  if score >= min_score:
428
  scored.append((score, item))
429
 
@@ -452,8 +539,9 @@ def search_knowledge(query, max_results=8):
452
  return "Keine Einträge vorhanden."
453
 
454
  scored = []
 
455
  for item in data:
456
- score = db_match_score(query, item)
457
  if score >= DB_FACT_MATCH_THRESHOLD:
458
  scored.append((score, item))
459
 
@@ -514,7 +602,7 @@ def delete_all_knowledge(admin_code):
514
  threading.Thread(target=upload_wissen_background, daemon=True).start()
515
  return True, "✅ Alle Wissenseinträge wurden gelöscht."
516
 
517
- def save_knowledge_entry(frage, antwort, kategorie="", quelle=""):
518
  global letzte_wissensänderung
519
 
520
  frage = (frage or "").strip()
@@ -535,11 +623,15 @@ def save_knowledge_entry(frage, antwort, kategorie="", quelle=""):
535
  if normalize_text(item.get("frage", "")) == q_norm:
536
  return False, "ℹ️ Dieser Eintrag ist schon vorhanden."
537
 
 
 
 
538
  entry = {
539
  "frage": frage,
540
  "antwort": antwort,
541
  "kategorie": kategorie,
542
  "quelle": quelle,
 
543
  "created_at": now_str()
544
  }
545
  data.append(entry)
@@ -714,10 +806,23 @@ def load_chat_history():
714
  def save_chat_history(history):
715
  save_json_list(CHAT_FILE, history)
716
 
 
 
 
 
 
 
 
 
 
 
 
 
 
717
  def reset_chat_history():
718
  global api_chat_historie
719
  with chat_lock:
720
- api_chat_historie = []
721
  save_chat_history(api_chat_historie)
722
  log_line("[CHAT] Chat-Historie zurückgesetzt.")
723
  return True, "✅ Chat-Historie gelöscht."
@@ -878,6 +983,49 @@ def model_generate(messages_history, max_new_tokens=120, temperature=0.6, do_sam
878
 
879
  return text
880
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
881
  def build_system_prompt(user_message=""):
882
  facts = find_relevant_facts(user_message, max_items=6)
883
  if not facts:
@@ -1003,29 +1151,114 @@ def generate_reply(user_message, history_context=""):
1003
 
1004
  # Falls keine Fakten da sind oder das Polieren Mist war: Normaler Chat
1005
  return general_chat_reply(user_message, history_context)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1006
  # =========================================================
1007
  # API
1008
  # =========================================================
1009
- def gradio_simple_api(user_message):
1010
  global api_chat_historie, letzte_api_latenz
1011
 
1012
  start = time.perf_counter()
 
1013
 
1014
  with chat_lock:
1015
- history_context = history_to_context(api_chat_historie)
 
1016
  reply = generate_reply(user_message, history_context=history_context)
1017
 
1018
- api_chat_historie.append({"role": "user", "content": user_message})
1019
- api_chat_historie.append({"role": "assistant", "content": reply})
1020
- trim_api_history(10)
 
 
1021
  save_chat_history(api_chat_historie)
1022
 
1023
- log_line(f"[USER] {user_message}")
1024
  log_line(f"[ASSISTANT] {reply}")
1025
 
1026
  letzte_api_latenz = f"{(time.perf_counter() - start) * 1000:.2f} ms"
1027
  return reply
1028
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1029
  # =========================================================
1030
  # UI FUNKTIONEN
1031
  # =========================================================
@@ -1066,11 +1299,65 @@ def ui_web_lernen(passwort, frage, antwort, kategorie):
1066
  ok, msg = save_knowledge_entry(frage, antwort, kategorie)
1067
  return msg
1068
 
1069
- def ui_link_lernen(passwort, url, thema, kategorie):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1070
  if passwort != ADMIN_CODE:
1071
  return "❌ Zugriff verweigert! Falscher Admin-Code."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1072
 
1073
- ok, msg = save_link_as_knowledge(url, thema, kategorie)
1074
  return msg
1075
 
1076
  def ui_wissen_suchen(suchbegriff):
@@ -1134,22 +1421,38 @@ def load_visible_chat_history_for_ui():
1134
  # APP
1135
  # =========================================================
1136
  def erzeuge_gradio_app():
1137
- with gr.Blocks(title="Privates KI Kontrollzentrum") as demo:
 
 
 
 
 
 
 
1138
  hidden_msg = gr.Textbox(value="", visible=False)
 
1139
  hidden_out = gr.Textbox(value="", visible=False)
1140
  api_trigger = gr.Button(visible=False)
1141
 
1142
  api_trigger.click(
1143
  gradio_simple_api,
1144
- inputs=[hidden_msg],
1145
  outputs=[hidden_out],
1146
  api_name="predict"
1147
  )
1148
 
 
 
 
 
 
 
 
 
1149
  gr.Markdown("# 🤖 Privates KI Kontrollzentrum")
1150
  gr.Markdown("Die KI nutzt zuerst ihre eigenen Antworten. Gefundene Fakten aus der Datenbank dienen nur als Zusatzwissen.")
1151
 
1152
- with gr.Tab("📊 Status"):
1153
  status_text = gr.Textbox(label="Systembericht", lines=16, interactive=False)
1154
  with gr.Row():
1155
  refresh_btn = gr.Button("Status aktualisieren")
@@ -1158,43 +1461,78 @@ def erzeuge_gradio_app():
1158
  sync_btn.click(ui_sync_wissen, outputs=status_text)
1159
  demo.load(ui_zeige_status, outputs=status_text)
1160
 
1161
- with gr.Tab("🧠 Lernen (Admin)"):
1162
- gr.Markdown("Hier speicherst du neue Fakten in die Datenbank.")
1163
- pw_input = gr.Textbox(label="Geheimer Code", type="password")
1164
- k_input = gr.Textbox(label="Kategorie / Bereich (optional)", placeholder="z. B. Geschichte, Geo, Technik")
1165
- q_input = gr.Textbox(label="Thema / Stichwort", placeholder="z. B. Frankreich, Mars, Bundeskanzler")
1166
- a_input = gr.Textbox(label="Text", placeholder="Langer Infotext", lines=6)
1167
- lern_btn = gr.Button("Wissen speichern", variant="primary")
1168
- lern_out = gr.Textbox(label="Ergebnis", interactive=False)
1169
- lern_btn.click(ui_web_lernen, inputs=[pw_input, q_input, a_input, k_input], outputs=lern_out)
1170
-
1171
- with gr.Tab("🌐 Link lernen"):
1172
- gr.Markdown("Ein öffentlicher Link wird ausgelesen, zusammengefasst und als Wissen gespeichert.")
1173
- link_pw = gr.Textbox(label="Geheimer Code", type="password")
1174
- link_url = gr.Textbox(label="Link", placeholder="https://...")
1175
- link_topic = gr.Textbox(label="Thema / Stichwort (optional)", placeholder="z. B. Minecraft, Deutschland, KI")
1176
- link_cat = gr.Textbox(label="Kategorie / Bereich (optional)", placeholder="z. B. web, geschichte, technik")
1177
- link_btn = gr.Button("Link lernen", variant="primary")
1178
- link_out = gr.Textbox(label="Ergebnis", lines=8, interactive=False)
1179
- link_btn.click(ui_link_lernen, inputs=[link_pw, link_url, link_topic, link_cat], outputs=link_out)
1180
-
1181
- with gr.Tab("🔍 Suchen / Löschen"):
1182
- gr.Markdown("Suche in der Datenbank oder lösche Einträge wieder.")
1183
- search_box = gr.Textbox(label="Suchbegriff", placeholder="z. B. Frankreich")
1184
- search_btn = gr.Button("Suchen")
1185
- search_out = gr.Textbox(label="Treffer", lines=12, interactive=False)
1186
-
1187
- del_pw = gr.Textbox(label="Admin-Code", type="password")
1188
- del_box = gr.Textbox(label="Löschen nach Begriff", placeholder="z. B. Frankreich")
1189
- del_btn = gr.Button("Löschen", variant="secondary")
1190
- del_out = gr.Textbox(label="Lösch-Ergebnis", interactive=False)
1191
-
1192
- all_del_btn = gr.Button("ALLES löschen", variant="stop")
1193
- all_del_out = gr.Textbox(label="Alles löschen", interactive=False)
1194
-
1195
- search_btn.click(ui_wissen_suchen, inputs=[search_box], outputs=search_out)
1196
- del_btn.click(ui_wissen_loeschen, inputs=[del_pw, del_box], outputs=del_out)
1197
- all_del_btn.click(ui_wissen_alle_loeschen, inputs=[del_pw], outputs=all_del_out)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1198
 
1199
  demo.queue(default_concurrency_limit=8)
1200
  return demo
@@ -1222,6 +1560,8 @@ def bootstrap():
1222
  ensure_json_list_file(WISSEN_FILE)
1223
  ensure_json_list_file(CHAT_FILE)
1224
 
 
 
1225
  sync_wissen_from_hf()
1226
  api_chat_historie = load_chat_history()
1227
 
 
16
 
17
  import torch
18
  import gradio as gr
19
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer, AutoModel
20
  from huggingface_hub import hf_hub_download, upload_file
21
 
22
  try:
 
29
  # =========================================================
30
 
31
  # --- MODELL ---
32
+ MODEL_NAME = "Qwen/Qwen2.5-3B-Instruct"
33
  # Beispiele:
34
  # "Qwen/Qwen3-0.6B" → sehr klein, schnell, schwächer
35
  # "Qwen/Qwen3-1.7B" → gute Balance (empfohlen)
 
50
  WISSEN_FILE = os.path.join(DATA_DIR, "wissen.json")
51
  CHAT_FILE = os.path.join(DATA_DIR, "chat_history.json")
52
  LOG_FILE = os.path.join(DATA_DIR, "ai_log.txt")
53
+ SETTINGS_FILE = os.path.join(DATA_DIR, "settings.json")
54
 
55
  # --- ANTWORTLÄNGE (wie lang darf die KI antworten?) ---
56
  MAX_NEW_TOKENS_CHAT = 80 # Normale Chat-Antwort
 
88
  # --- KI-PERSÖNLICHKEIT ---
89
  AI_NAME = "RedJul2110"
90
  FALLBACK_NO_INFO = "Dazu habe ich gerade keine sichere Antwort."
91
+ SYSTEM_PROMPT_ZUSATZ = "Du bist ein brillanter KI-Assistent. Du kannst sehr gut programmieren und Code-Beispiele liefern."
92
 
93
  # --- WISSENSDATENBANK ---
94
  USE_QWEN_POLISH = True # True = KI verfeinert Antworten mit Wissen
 
105
  # =========================================================
106
  model = None
107
  tokenizer = None
108
+ embed_model = None
109
+ embed_tokenizer = None
110
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
111
 
112
  knowledge_lock = threading.Lock()
 
121
  letzte_api_latenz = None
122
  letzter_fehler = None
123
 
124
+ def load_settings():
125
+ global TEMPERATURE_CHAT, MAX_NEW_TOKENS_CHAT, AI_NAME, SYSTEM_PROMPT_ZUSATZ
126
+ if os.path.exists(SETTINGS_FILE):
127
+ try:
128
+ with open(SETTINGS_FILE, "r", encoding="utf-8") as f:
129
+ s = json.load(f)
130
+ TEMPERATURE_CHAT = s.get("temperature", TEMPERATURE_CHAT)
131
+ MAX_NEW_TOKENS_CHAT = s.get("max_tokens", MAX_NEW_TOKENS_CHAT)
132
+ AI_NAME = s.get("ai_name", AI_NAME)
133
+ SYSTEM_PROMPT_ZUSATZ = s.get("system_prompt", SYSTEM_PROMPT_ZUSATZ)
134
+ except Exception:
135
+ pass
136
+
137
+ def save_settings(pw, t, m, n, p):
138
+ global TEMPERATURE_CHAT, MAX_NEW_TOKENS_CHAT, AI_NAME, SYSTEM_PROMPT_ZUSATZ
139
+ if pw != ADMIN_CODE:
140
+ return "❌ Falscher Admin-Code."
141
+
142
+ try:
143
+ TEMPERATURE_CHAT = float(t)
144
+ MAX_NEW_TOKENS_CHAT = int(m)
145
+ AI_NAME = str(n).strip()
146
+ SYSTEM_PROMPT_ZUSATZ = str(p).strip()
147
+
148
+ s = {
149
+ "temperature": TEMPERATURE_CHAT,
150
+ "max_tokens": MAX_NEW_TOKENS_CHAT,
151
+ "ai_name": AI_NAME,
152
+ "system_prompt": SYSTEM_PROMPT_ZUSATZ
153
+ }
154
+ with open(SETTINGS_FILE, "w", encoding="utf-8") as f:
155
+ json.dump(s, f, indent=2)
156
+ return "✅ Einstellungen erfolgreich gespeichert!"
157
+ except Exception as e:
158
+ return f"❌ Fehler beim Speichern: {e}"
159
+
160
  # =========================================================
161
  # HILFSFUNKTIONEN
162
  # =========================================================
 
389
  finally:
390
  upload_in_progress = False
391
 
392
+ def init_embed_model():
393
+ global embed_tokenizer, embed_model
394
+ if embed_model is not None:
395
+ return
396
+ try:
397
+ log_line("[INFO] Lade Embedding-Modell für semantische Suche...")
398
+ embed_tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
399
+ embed_model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2").to(device)
400
+ embed_model.eval()
401
+ log_line("[INFO] Embedding-Modell erfolgreich geladen.")
402
+ except Exception as e:
403
+ log_error("init_embed_model", e)
404
+
405
+ def get_embedding(text):
406
+ init_embed_model()
407
+ if embed_model is None or embed_tokenizer is None or not text:
408
+ return []
409
+
410
+ try:
411
+ inputs = embed_tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
412
+ with torch.no_grad():
413
+ outputs = embed_model(**inputs)
414
+ # Mean Pooling
415
+ emb = outputs.last_hidden_state.mean(dim=1)
416
+ return emb[0].cpu().numpy().tolist()
417
+ except Exception as e:
418
+ log_error("get_embedding", e)
419
+ return []
420
+
421
+ def cosine_similarity(v1, v2):
422
+ if not v1 or not v2 or len(v1) != len(v2):
423
+ return 0.0
424
+ import math
425
+ dot = sum(a * b for a, b in zip(v1, v2))
426
+ mag1 = math.sqrt(sum(a * a for a in v1))
427
+ mag2 = math.sqrt(sum(b * b for b in v2))
428
+ if mag1 == 0 or mag2 == 0:
429
+ return 0.0
430
+ return dot / (mag1 * mag2)
431
+
432
+ def db_match_score(query, item, query_emb=None):
433
  q_norm = normalize_text(query)
434
  frage = normalize_text(item.get("frage", ""))
435
  antwort = item.get("antwort", "")
 
445
  c_tokens = text_tokens(blob_norm)
446
  token_score = len(q_tokens & c_tokens) / max(len(q_tokens), 1)
447
 
448
+ semantic_score = 0.0
449
+ item_emb = item.get("embedding", [])
450
+ if query_emb and item_emb:
451
+ semantic_score = cosine_similarity(query_emb, item_emb)
452
+
453
  bonus = 0.0
454
  if q_norm == frage:
455
  bonus += 0.35
 
460
  if kategorie and q_norm == kategorie:
461
  bonus += 0.08
462
 
463
+ return max((seq * 0.6) + (token_score * 0.4) + bonus, semantic_score + bonus)
464
 
465
  def exact_db_answer(user_message):
466
  q = normalize_text(user_message)
 
489
 
490
  best_item = None
491
  best_score = 0.0
492
+ query_emb = get_embedding(user_message)
493
 
494
  for item in data:
495
+ score = db_match_score(user_message, item, query_emb=query_emb)
496
  if score > best_score:
497
  best_score = score
498
  best_item = item
 
508
  return []
509
 
510
  scored = []
511
+ query_emb = get_embedding(query)
512
  for item in data:
513
+ score = db_match_score(query, item, query_emb=query_emb)
514
  if score >= min_score:
515
  scored.append((score, item))
516
 
 
539
  return "Keine Einträge vorhanden."
540
 
541
  scored = []
542
+ query_emb = get_embedding(query)
543
  for item in data:
544
+ score = db_match_score(query, item, query_emb=query_emb)
545
  if score >= DB_FACT_MATCH_THRESHOLD:
546
  scored.append((score, item))
547
 
 
602
  threading.Thread(target=upload_wissen_background, daemon=True).start()
603
  return True, "✅ Alle Wissenseinträge wurden gelöscht."
604
 
605
+ def save_knowledge_entry(frage, antwort, kategorie="", quelle="", embedding=None):
606
  global letzte_wissensänderung
607
 
608
  frage = (frage or "").strip()
 
623
  if normalize_text(item.get("frage", "")) == q_norm:
624
  return False, "ℹ️ Dieser Eintrag ist schon vorhanden."
625
 
626
+ if embedding is None:
627
+ embedding = get_embedding(frage)
628
+
629
  entry = {
630
  "frage": frage,
631
  "antwort": antwort,
632
  "kategorie": kategorie,
633
  "quelle": quelle,
634
+ "embedding": embedding,
635
  "created_at": now_str()
636
  }
637
  data.append(entry)
 
806
  def save_chat_history(history):
807
  save_json_list(CHAT_FILE, history)
808
 
809
+ def get_chat_session(session_id="default"):
810
+ global api_chat_historie
811
+ if not isinstance(api_chat_historie, dict):
812
+ if isinstance(api_chat_historie, list):
813
+ api_chat_historie = {"default": api_chat_historie}
814
+ else:
815
+ api_chat_historie = {"default": []}
816
+
817
+ if session_id not in api_chat_historie:
818
+ api_chat_historie[session_id] = []
819
+
820
+ return api_chat_historie[session_id]
821
+
822
  def reset_chat_history():
823
  global api_chat_historie
824
  with chat_lock:
825
+ api_chat_historie = {"default": []}
826
  save_chat_history(api_chat_historie)
827
  log_line("[CHAT] Chat-Historie zurückgesetzt.")
828
  return True, "✅ Chat-Historie gelöscht."
 
983
 
984
  return text
985
 
986
+ def model_generate_stream(messages_history, max_new_tokens=120, temperature=0.6, do_sample=True):
987
+ if model is None or tokenizer is None:
988
+ yield "Modell nicht geladen."
989
+ return
990
+
991
+ prompt_text = format_messages_for_model(messages_history)
992
+ if not prompt_text:
993
+ return
994
+
995
+ inputs = tokenizer(
996
+ [prompt_text],
997
+ return_tensors="pt",
998
+ truncation=True,
999
+ max_length=2048
1000
+ ).to(device)
1001
+
1002
+ attention_mask = inputs.get("attention_mask", None)
1003
+ streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
1004
+
1005
+ generation_kwargs = {
1006
+ "input_ids": inputs.input_ids,
1007
+ "attention_mask": attention_mask,
1008
+ "max_new_tokens": max_new_tokens,
1009
+ "do_sample": do_sample,
1010
+ "temperature": temperature,
1011
+ "top_p": 0.90,
1012
+ "top_k": 40,
1013
+ "repetition_penalty": REPETITION_PENALTY,
1014
+ "no_repeat_ngram_size": NO_REPEAT_NGRAM_SIZE,
1015
+ "pad_token_id": tokenizer.eos_token_id,
1016
+ "eos_token_id": tokenizer.eos_token_id,
1017
+ "streamer": streamer
1018
+ }
1019
+
1020
+ thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
1021
+ thread.start()
1022
+
1023
+ generated_text = ""
1024
+ for new_text in streamer:
1025
+ generated_text += new_text
1026
+ yield generated_text.strip()
1027
+
1028
+
1029
  def build_system_prompt(user_message=""):
1030
  facts = find_relevant_facts(user_message, max_items=6)
1031
  if not facts:
 
1151
 
1152
  # Falls keine Fakten da sind oder das Polieren Mist war: Normaler Chat
1153
  return general_chat_reply(user_message, history_context)
1154
+
1155
+ def general_chat_reply_stream(user_message, history_context=""):
1156
+ if model is None or tokenizer is None:
1157
+ yield "Dazu habe ich gerade keine sichere Antwort."
1158
+ return
1159
+
1160
+ messages = [
1161
+ {
1162
+ "role": "system",
1163
+ "content": f"Du bist {AI_NAME}. Antworte immer auf Deutsch. Kurz, direkt, hilfreich. Keine Floskeln. Bei Unsicherheit: 'Ich bin nicht sicher, aber ich glaube...'"
1164
+ },
1165
+ {
1166
+ "role": "user",
1167
+ "content": user_message
1168
+ }
1169
+ ]
1170
+
1171
+ try:
1172
+ for chunk in model_generate_stream(messages, max_new_tokens=MAX_NEW_TOKENS_CHAT, temperature=TEMPERATURE_CHAT, do_sample=True):
1173
+ yield chunk or "Dazu habe ich gerade keine sichere Antwort."
1174
+ except Exception as e:
1175
+ log_error("general_chat_reply_stream", e)
1176
+ yield "Dazu habe ich gerade keine sichere Antwort."
1177
+
1178
+ def generate_reply_stream(user_message, history_context=""):
1179
+ query = f"{user_message} {history_context}".strip()
1180
+
1181
+ facts = find_relevant_facts(query, max_items=6)
1182
+ facts = dedupe_facts(facts)
1183
+
1184
+ exact = exact_db_answer(user_message)
1185
+ if exact and not is_generic_or_placeholder_answer(exact):
1186
+ extra_fact = {
1187
+ "frage": user_message,
1188
+ "antwort": exact,
1189
+ "kategorie": "",
1190
+ "quelle": "",
1191
+ "created_at": ""
1192
+ }
1193
+ facts = dedupe_facts([extra_fact] + facts)
1194
+
1195
+ draft = compose_draft_from_facts(facts)
1196
+ if facts and len(facts) > 0:
1197
+ reply = polish_with_model(user_message, draft, facts, history_context)
1198
+ if reply and not is_generic_or_placeholder_answer(reply):
1199
+ yield reply
1200
+ return
1201
+
1202
+ # Normaler Chat Stream
1203
+ for chunk in general_chat_reply_stream(user_message, history_context):
1204
+ yield chunk
1205
  # =========================================================
1206
  # API
1207
  # =========================================================
1208
+ def gradio_simple_api(user_message, session_id="default"):
1209
  global api_chat_historie, letzte_api_latenz
1210
 
1211
  start = time.perf_counter()
1212
+ session_id = session_id or "default"
1213
 
1214
  with chat_lock:
1215
+ history = get_chat_session(session_id)
1216
+ history_context = history_to_context(history)
1217
  reply = generate_reply(user_message, history_context=history_context)
1218
 
1219
+ history.append({"role": "user", "content": user_message})
1220
+ history.append({"role": "assistant", "content": reply})
1221
+ if len(history) > MAX_CHAT_HISTORY:
1222
+ api_chat_historie[session_id] = history[-MAX_CHAT_HISTORY:]
1223
+
1224
  save_chat_history(api_chat_historie)
1225
 
1226
+ log_line(f"[USER] {user_message} (Session: {session_id})")
1227
  log_line(f"[ASSISTANT] {reply}")
1228
 
1229
  letzte_api_latenz = f"{(time.perf_counter() - start) * 1000:.2f} ms"
1230
  return reply
1231
 
1232
+ def gradio_stream_api(user_message, session_id="default"):
1233
+ global api_chat_historie, letzte_api_latenz
1234
+
1235
+ start = time.perf_counter()
1236
+ session_id = session_id or "default"
1237
+
1238
+ with chat_lock:
1239
+ history = get_chat_session(session_id)
1240
+ history_context = history_to_context(history)
1241
+
1242
+ reply = ""
1243
+ for chunk in generate_reply_stream(user_message, history_context=history_context):
1244
+ reply = chunk
1245
+ yield reply
1246
+
1247
+ with chat_lock:
1248
+ history = get_chat_session(session_id)
1249
+ history.append({"role": "user", "content": user_message})
1250
+ history.append({"role": "assistant", "content": reply})
1251
+ if len(history) > MAX_CHAT_HISTORY:
1252
+ api_chat_historie[session_id] = history[-MAX_CHAT_HISTORY:]
1253
+
1254
+ save_chat_history(api_chat_historie)
1255
+
1256
+ log_line(f"[USER] {user_message} (Session: {session_id})")
1257
+ log_line(f"[ASSISTANT] {reply}")
1258
+
1259
+ letzte_api_latenz = f"{(time.perf_counter() - start) * 1000:.2f} ms"
1260
+
1261
+
1262
  # =========================================================
1263
  # UI FUNKTIONEN
1264
  # =========================================================
 
1299
  ok, msg = save_knowledge_entry(frage, antwort, kategorie)
1300
  return msg
1301
 
1302
+ def ui_link_lernen_multi(passwort, urls_text, thema, kategorie):
1303
+ if passwort != ADMIN_CODE:
1304
+ return "❌ Zugriff verweigert! Falscher Admin-Code."
1305
+
1306
+ urls = [u.strip() for u in urls_text.replace(",", "\n").split("\n") if u.strip()]
1307
+ if not urls:
1308
+ return "❌ Keine gültigen URLs gefunden."
1309
+
1310
+ results = []
1311
+ for u in urls:
1312
+ ok, msg = save_link_as_knowledge(u, thema, kategorie)
1313
+ results.append(f"[{u}]: {msg}")
1314
+
1315
+ return "\n\n".join(results)
1316
+
1317
+ def extract_pdf_text_fallback(filepath):
1318
+ try:
1319
+ import PyPDF2
1320
+ text = ""
1321
+ with open(filepath, "rb") as f:
1322
+ reader = PyPDF2.PdfReader(f)
1323
+ for page in reader.pages:
1324
+ page_text = page.extract_text()
1325
+ if page_text:
1326
+ text += page_text + "\n"
1327
+ return text.strip()
1328
+ except ImportError:
1329
+ return "ERROR_NO_PYPDF2"
1330
+ except Exception as e:
1331
+ return f"ERROR_READ: {e}"
1332
+
1333
+ def ui_pdf_lernen(passwort, file_obj, thema, kategorie):
1334
  if passwort != ADMIN_CODE:
1335
  return "❌ Zugriff verweigert! Falscher Admin-Code."
1336
+
1337
+ if file_obj is None:
1338
+ return "❌ Bitte eine Datei hochladen."
1339
+
1340
+ filepath = file_obj if type(file_obj) is str else getattr(file_obj, "name", None)
1341
+ if not filepath:
1342
+ return "❌ Dateipfad konnte nicht ermittelt werden."
1343
+
1344
+ text = extract_pdf_text_fallback(filepath)
1345
+ if text == "ERROR_NO_PYPDF2":
1346
+ return "❌ Das Paket 'PyPDF2' fehlt. Bitte füge 'PyPDF2' zu deiner 'requirements.txt' in Hugging Face hinzu!"
1347
+ elif text.startswith("ERROR_READ:"):
1348
+ return f"❌ Fehler beim Lesen: {text}"
1349
+
1350
+ if len(text) < 50:
1351
+ return "❌ In der PDF konnte kein/kaum Text gefunden werden."
1352
+
1353
+ topic = thema or "PDF Dokument"
1354
+ cat = kategorie or "dokument"
1355
+
1356
+ summary = summarize_web_text(topic, text)
1357
+ if not summary or len(summary.strip()) < 30:
1358
+ summary = heuristic_summary(text, max_sentences=6, max_chars=3000)
1359
 
1360
+ ok, msg = save_knowledge_entry(frage=topic, antwort=summary, kategorie=cat, quelle="PDF Upload")
1361
  return msg
1362
 
1363
  def ui_wissen_suchen(suchbegriff):
 
1421
  # APP
1422
  # =========================================================
1423
  def erzeuge_gradio_app():
1424
+ custom_css = """
1425
+ body { background: linear-gradient(135deg, #0f2027, #203a43, #2c5364); color: #fff; font-family: 'Inter', sans-serif; }
1426
+ .gradio-container { background: rgba(255, 255, 255, 0.05); backdrop-filter: blur(15px); border-radius: 12px; border: 1px solid rgba(255,255,255,0.1); box-shadow: 0 8px 32px 0 rgba(0, 0, 0, 0.37); }
1427
+ button.primary { background: linear-gradient(90deg, #00C9FF 0%, #92FE9D 100%); border: none; color: black; font-weight: bold; }
1428
+ button.primary:hover { transform: translateY(-2px); box-shadow: 0 5px 15px rgba(0,201,255,0.4); }
1429
+ """
1430
+
1431
+ with gr.Blocks(title="Privates KI Kontrollzentrum", theme=gr.themes.Soft(), css=custom_css) as demo:
1432
  hidden_msg = gr.Textbox(value="", visible=False)
1433
+ hidden_session = gr.Textbox(value="default", visible=False)
1434
  hidden_out = gr.Textbox(value="", visible=False)
1435
  api_trigger = gr.Button(visible=False)
1436
 
1437
  api_trigger.click(
1438
  gradio_simple_api,
1439
+ inputs=[hidden_msg, hidden_session],
1440
  outputs=[hidden_out],
1441
  api_name="predict"
1442
  )
1443
 
1444
+ api_trigger_stream = gr.Button(visible=False)
1445
+ api_trigger_stream.click(
1446
+ gradio_stream_api,
1447
+ inputs=[hidden_msg, hidden_session],
1448
+ outputs=[hidden_out],
1449
+ api_name="stream"
1450
+ )
1451
+
1452
  gr.Markdown("# 🤖 Privates KI Kontrollzentrum")
1453
  gr.Markdown("Die KI nutzt zuerst ihre eigenen Antworten. Gefundene Fakten aus der Datenbank dienen nur als Zusatzwissen.")
1454
 
1455
+ with gr.Tab("📊 System Status"):
1456
  status_text = gr.Textbox(label="Systembericht", lines=16, interactive=False)
1457
  with gr.Row():
1458
  refresh_btn = gr.Button("Status aktualisieren")
 
1461
  sync_btn.click(ui_sync_wissen, outputs=status_text)
1462
  demo.load(ui_zeige_status, outputs=status_text)
1463
 
1464
+ with gr.Tab("🔒 Admin-Bereich"):
1465
+ login_col = gr.Column(visible=True)
1466
+ admin_col = gr.Column(visible=False)
1467
+
1468
+ with login_col:
1469
+ gr.Markdown("### Bitte Admin-Code eingeben, um Einstellungen und Lern-Tools freizuschalten.")
1470
+ admin_pw = gr.Textbox(label="Admin-Code", type="password")
1471
+ login_btn = gr.Button("Login", variant="primary")
1472
+ login_err = gr.Markdown(visible=False)
1473
+
1474
+ with admin_col:
1475
+ with gr.Tabs():
1476
+ with gr.Tab("🧠 Wissen lernen"):
1477
+ gr.Markdown("Speichere neue Fakten direkt in die Datenbank.")
1478
+ q_input = gr.Textbox(label="Thema / Stichwort", placeholder="z. B. Frankreich, Mars")
1479
+ a_input = gr.Textbox(label="Text", placeholder="Langer Infotext", lines=6)
1480
+ k_input = gr.Textbox(label="Kategorie / Bereich (optional)", placeholder="z. B. Geschichte")
1481
+ lern_btn = gr.Button("Wissen speichern", variant="primary")
1482
+ lern_out = gr.Textbox(label="Ergebnis", interactive=False)
1483
+ lern_btn.click(lambda q, a, k: ui_web_lernen(ADMIN_CODE, q, a, k), inputs=[q_input, a_input, k_input], outputs=lern_out)
1484
+
1485
+ with gr.Tab("🌐 Link lernen (Multi)"):
1486
+ gr.Markdown("Ein oder mehrere öffentliche Links einfügen (durch neue Zeile getrennt). Die KI liest und lernt diese.")
1487
+ link_urls = gr.Textbox(label="Links (Eine URL pro Zeile)", placeholder="https://...\nhttps://...", lines=5)
1488
+ link_topic = gr.Textbox(label="Thema / Stichwort (optional)")
1489
+ link_cat = gr.Textbox(label="Kategorie / Bereich (optional)")
1490
+ link_btn = gr.Button("Links lernen", variant="primary")
1491
+ link_out = gr.Textbox(label="Ergebnis", lines=8, interactive=False)
1492
+ link_btn.click(lambda u, t, c: ui_link_lernen_multi(ADMIN_CODE, u, t, c), inputs=[link_urls, link_topic, link_cat], outputs=link_out)
1493
+
1494
+ with gr.Tab("📄 PDF lernen"):
1495
+ gr.Markdown("Lade eine PDF-Datei hoch, um ihren Text zu analysieren und als Wissen zu speichern.")
1496
+ pdf_file = gr.File(label="PDF Datei", file_types=[".pdf"])
1497
+ pdf_topic = gr.Textbox(label="Thema / Stichwort (optional)")
1498
+ pdf_cat = gr.Textbox(label="Kategorie / Bereich (optional)")
1499
+ pdf_btn = gr.Button("Dokument lernen", variant="primary")
1500
+ pdf_out = gr.Textbox(label="Ergebnis", lines=6, interactive=False)
1501
+ pdf_btn.click(lambda f, t, c: ui_pdf_lernen(ADMIN_CODE, f, t, c), inputs=[pdf_file, pdf_topic, pdf_cat], outputs=pdf_out)
1502
+
1503
+ with gr.Tab("🔍 Suchen / Löschen"):
1504
+ gr.Markdown("Suche in der Datenbank oder lösche Einträge wieder.")
1505
+ search_box = gr.Textbox(label="Suchbegriff", placeholder="z. B. Frankreich")
1506
+ search_btn = gr.Button("Suchen")
1507
+ search_out = gr.Textbox(label="Treffer", lines=12, interactive=False)
1508
+
1509
+ del_box = gr.Textbox(label="Löschen nach Begriff", placeholder="z. B. Frankreich")
1510
+ del_btn = gr.Button("Löschen", variant="secondary")
1511
+ del_out = gr.Textbox(label="Lösch-Ergebnis", interactive=False)
1512
+
1513
+ all_del_btn = gr.Button("ALLES löschen", variant="stop")
1514
+ all_del_out = gr.Textbox(label="Alles löschen", interactive=False)
1515
+
1516
+ search_btn.click(ui_wissen_suchen, inputs=[search_box], outputs=search_out)
1517
+ del_btn.click(lambda d: ui_wissen_loeschen(ADMIN_CODE, d), inputs=[del_box], outputs=del_out)
1518
+ all_del_btn.click(lambda: ui_wissen_alle_loeschen(ADMIN_CODE), inputs=[], outputs=all_del_out)
1519
+
1520
+ with gr.Tab("⚙️ Einstellungen"):
1521
+ gr.Markdown("Konfiguriere das Verhalten der KI direkt ohne Neustart/Terminal.")
1522
+ set_temp = gr.Slider(0.01, 2.0, value=float(TEMPERATURE_CHAT), label="Kreativität (Temperature)")
1523
+ set_max = gr.Slider(10, 2048, step=1, value=int(MAX_NEW_TOKENS_CHAT), label="Max Antwort-Länge (Tokens)")
1524
+ set_name = gr.Textbox(value=str(AI_NAME), label="KI Name")
1525
+ set_prompt = gr.Textbox(value=str(SYSTEM_PROMPT_ZUSATZ), label="System Prompt Zusatz", lines=3)
1526
+ set_btn = gr.Button("Speichern", variant="primary")
1527
+ set_out = gr.Textbox(label="Ergebnis", interactive=False)
1528
+ set_btn.click(lambda t, m, n, p: save_settings(ADMIN_CODE, t, m, n, p), inputs=[set_temp, set_max, set_name, set_prompt], outputs=set_out)
1529
+
1530
+ def do_login(pw):
1531
+ if pw == ADMIN_CODE:
1532
+ return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
1533
+ return gr.update(visible=True), gr.update(visible=False), gr.update(value="**❌ Falscher Admin-Code**", visible=True)
1534
+
1535
+ login_btn.click(do_login, inputs=[admin_pw], outputs=[login_col, admin_col, login_err])
1536
 
1537
  demo.queue(default_concurrency_limit=8)
1538
  return demo
 
1560
  ensure_json_list_file(WISSEN_FILE)
1561
  ensure_json_list_file(CHAT_FILE)
1562
 
1563
+ load_settings()
1564
+
1565
  sync_wissen_from_hf()
1566
  api_chat_historie = load_chat_history()
1567