Spaces:

RedJul2110
/

MyfirstAI

Running

App Files Files Community

RedJul2110 commited on about 11 hours ago

Commit

a1ca032

verified ·

1 Parent(s): 339ab2a

Update app.py

Browse files

Files changed (1) hide show

app.py +397 -57

app.py CHANGED Viewed

@@ -16,7 +16,7 @@ from difflib import SequenceMatcher
 import torch
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForCausalLM
 from huggingface_hub import hf_hub_download, upload_file
 try:
@@ -29,7 +29,7 @@ except Exception:
 # =========================================================
 # --- MODELL ---
-MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct"
 # Beispiele:
 # "Qwen/Qwen3-0.6B"       → sehr klein, schnell, schwächer
 # "Qwen/Qwen3-1.7B"       → gute Balance (empfohlen)
@@ -50,6 +50,7 @@ os.makedirs(DATA_DIR, exist_ok=True)
 WISSEN_FILE = os.path.join(DATA_DIR, "wissen.json")
 CHAT_FILE   = os.path.join(DATA_DIR, "chat_history.json")
 LOG_FILE    = os.path.join(DATA_DIR, "ai_log.txt")
 # --- ANTWORTLÄNGE (wie lang darf die KI antworten?) ---
 MAX_NEW_TOKENS_CHAT    = 80   # Normale Chat-Antwort
@@ -87,6 +88,7 @@ MAX_CONTEXT_TURNS  = 2    # Wie viele Nachrichten als Kontext genutzt werden
 # --- KI-PERSÖNLICHKEIT ---
 AI_NAME = "RedJul2110"
 FALLBACK_NO_INFO = "Dazu habe ich gerade keine sichere Antwort."
 # --- WISSENSDATENBANK ---
 USE_QWEN_POLISH          = True   # True = KI verfeinert Antworten mit Wissen
@@ -103,6 +105,8 @@ DB_FACT_MATCH_THRESHOLD   = 0.70  # Ab wann gilt ein Fakt als passend? (0.0–1.
 # =========================================================
 model = None
 tokenizer = None
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 knowledge_lock = threading.Lock()
@@ -117,6 +121,42 @@ letzte_wissensänderung = None
 letzte_api_latenz = None
 letzter_fehler = None
 # =========================================================
 # HILFSFUNKTIONEN
 # =========================================================
@@ -349,7 +389,47 @@ def upload_wissen_background():
     finally:
         upload_in_progress = False
-def db_match_score(query, item):
     q_norm = normalize_text(query)
     frage = normalize_text(item.get("frage", ""))
     antwort = item.get("antwort", "")
@@ -365,6 +445,11 @@ def db_match_score(query, item):
     c_tokens = text_tokens(blob_norm)
     token_score = len(q_tokens & c_tokens) / max(len(q_tokens), 1)
     bonus = 0.0
     if q_norm == frage:
         bonus += 0.35
@@ -375,7 +460,7 @@ def db_match_score(query, item):
     if kategorie and q_norm == kategorie:
         bonus += 0.08
-    return (seq * 0.6) + (token_score * 0.4) + bonus
 def exact_db_answer(user_message):
     q = normalize_text(user_message)
@@ -404,9 +489,10 @@ def best_db_answer(user_message, threshold=DB_DIRECT_MATCH_THRESHOLD):
     best_item = None
     best_score = 0.0
     for item in data:
-        score = db_match_score(user_message, item)
         if score > best_score:
             best_score = score
             best_item = item
@@ -422,8 +508,9 @@ def find_relevant_facts(query, max_items=6, min_score=DB_FACT_MATCH_THRESHOLD):
         return []
     scored = []
     for item in data:
-        score = db_match_score(query, item)
         if score >= min_score:
             scored.append((score, item))
@@ -452,8 +539,9 @@ def search_knowledge(query, max_results=8):
         return "Keine Einträge vorhanden."
     scored = []
     for item in data:
-        score = db_match_score(query, item)
         if score >= DB_FACT_MATCH_THRESHOLD:
             scored.append((score, item))
@@ -514,7 +602,7 @@ def delete_all_knowledge(admin_code):
         threading.Thread(target=upload_wissen_background, daemon=True).start()
         return True, "✅ Alle Wissenseinträge wurden gelöscht."
-def save_knowledge_entry(frage, antwort, kategorie="", quelle=""):
     global letzte_wissensänderung
     frage = (frage or "").strip()
@@ -535,11 +623,15 @@ def save_knowledge_entry(frage, antwort, kategorie="", quelle=""):
             if normalize_text(item.get("frage", "")) == q_norm:
                 return False, "ℹ️ Dieser Eintrag ist schon vorhanden."
         entry = {
             "frage": frage,
             "antwort": antwort,
             "kategorie": kategorie,
             "quelle": quelle,
             "created_at": now_str()
         }
         data.append(entry)
@@ -714,10 +806,23 @@ def load_chat_history():
 def save_chat_history(history):
     save_json_list(CHAT_FILE, history)
 def reset_chat_history():
     global api_chat_historie
     with chat_lock:
-        api_chat_historie = []
         save_chat_history(api_chat_historie)
     log_line("[CHAT] Chat-Historie zurückgesetzt.")
     return True, "✅ Chat-Historie gelöscht."
@@ -878,6 +983,49 @@ def model_generate(messages_history, max_new_tokens=120, temperature=0.6, do_sam
     return text
 def build_system_prompt(user_message=""):
     facts = find_relevant_facts(user_message, max_items=6)
     if not facts:
@@ -1003,29 +1151,114 @@ def generate_reply(user_message, history_context=""):
     # Falls keine Fakten da sind oder das Polieren Mist war: Normaler Chat
     return general_chat_reply(user_message, history_context)
 # =========================================================
 # API
 # =========================================================
-def gradio_simple_api(user_message):
     global api_chat_historie, letzte_api_latenz
     start = time.perf_counter()
     with chat_lock:
-        history_context = history_to_context(api_chat_historie)
         reply = generate_reply(user_message, history_context=history_context)
-        api_chat_historie.append({"role": "user", "content": user_message})
-        api_chat_historie.append({"role": "assistant", "content": reply})
-        trim_api_history(10)
         save_chat_history(api_chat_historie)
-        log_line(f"[USER] {user_message}")
         log_line(f"[ASSISTANT] {reply}")
     letzte_api_latenz = f"{(time.perf_counter() - start) * 1000:.2f} ms"
     return reply
 # =========================================================
 # UI FUNKTIONEN
 # =========================================================
@@ -1066,11 +1299,65 @@ def ui_web_lernen(passwort, frage, antwort, kategorie):
     ok, msg = save_knowledge_entry(frage, antwort, kategorie)
     return msg
-def ui_link_lernen(passwort, url, thema, kategorie):
     if passwort != ADMIN_CODE:
         return "❌ Zugriff verweigert! Falscher Admin-Code."
-    ok, msg = save_link_as_knowledge(url, thema, kategorie)
     return msg
 def ui_wissen_suchen(suchbegriff):
@@ -1134,22 +1421,38 @@ def load_visible_chat_history_for_ui():
 # APP
 # =========================================================
 def erzeuge_gradio_app():
-    with gr.Blocks(title="Privates KI Kontrollzentrum") as demo:
         hidden_msg = gr.Textbox(value="", visible=False)
         hidden_out = gr.Textbox(value="", visible=False)
         api_trigger = gr.Button(visible=False)
         api_trigger.click(
             gradio_simple_api,
-            inputs=[hidden_msg],
             outputs=[hidden_out],
             api_name="predict"
         )
         gr.Markdown("# 🤖 Privates KI Kontrollzentrum")
         gr.Markdown("Die KI nutzt zuerst ihre eigenen Antworten. Gefundene Fakten aus der Datenbank dienen nur als Zusatzwissen.")
-        with gr.Tab("📊 Status"):
             status_text = gr.Textbox(label="Systembericht", lines=16, interactive=False)
             with gr.Row():
                 refresh_btn = gr.Button("Status aktualisieren")
@@ -1158,43 +1461,78 @@ def erzeuge_gradio_app():
             sync_btn.click(ui_sync_wissen, outputs=status_text)
             demo.load(ui_zeige_status, outputs=status_text)
-        with gr.Tab("🧠 Lernen (Admin)"):
-            gr.Markdown("Hier speicherst du neue Fakten in die Datenbank.")
-            pw_input = gr.Textbox(label="Geheimer Code", type="password")
-            k_input = gr.Textbox(label="Kategorie / Bereich (optional)", placeholder="z. B. Geschichte, Geo, Technik")
-            q_input = gr.Textbox(label="Thema / Stichwort", placeholder="z. B. Frankreich, Mars, Bundeskanzler")
-            a_input = gr.Textbox(label="Text", placeholder="Langer Infotext", lines=6)
-            lern_btn = gr.Button("Wissen speichern", variant="primary")
-            lern_out = gr.Textbox(label="Ergebnis", interactive=False)
-            lern_btn.click(ui_web_lernen, inputs=[pw_input, q_input, a_input, k_input], outputs=lern_out)
-        with gr.Tab("🌐 Link lernen"):
-            gr.Markdown("Ein öffentlicher Link wird ausgelesen, zusammengefasst und als Wissen gespeichert.")
-            link_pw = gr.Textbox(label="Geheimer Code", type="password")
-            link_url = gr.Textbox(label="Link", placeholder="https://...")
-            link_topic = gr.Textbox(label="Thema / Stichwort (optional)", placeholder="z. B. Minecraft, Deutschland, KI")
-            link_cat = gr.Textbox(label="Kategorie / Bereich (optional)", placeholder="z. B. web, geschichte, technik")
-            link_btn = gr.Button("Link lernen", variant="primary")
-            link_out = gr.Textbox(label="Ergebnis", lines=8, interactive=False)
-            link_btn.click(ui_link_lernen, inputs=[link_pw, link_url, link_topic, link_cat], outputs=link_out)
-        with gr.Tab("🔍 Suchen / Löschen"):
-            gr.Markdown("Suche in der Datenbank oder lösche Einträge wieder.")
-            search_box = gr.Textbox(label="Suchbegriff", placeholder="z. B. Frankreich")
-            search_btn = gr.Button("Suchen")
-            search_out = gr.Textbox(label="Treffer", lines=12, interactive=False)
-            del_pw = gr.Textbox(label="Admin-Code", type="password")
-            del_box = gr.Textbox(label="Löschen nach Begriff", placeholder="z. B. Frankreich")
-            del_btn = gr.Button("Löschen", variant="secondary")
-            del_out = gr.Textbox(label="Lösch-Ergebnis", interactive=False)
-            all_del_btn = gr.Button("ALLES löschen", variant="stop")
-            all_del_out = gr.Textbox(label="Alles löschen", interactive=False)
-            search_btn.click(ui_wissen_suchen, inputs=[search_box], outputs=search_out)
-            del_btn.click(ui_wissen_loeschen, inputs=[del_pw, del_box], outputs=del_out)
-            all_del_btn.click(ui_wissen_alle_loeschen, inputs=[del_pw], outputs=all_del_out)
         demo.queue(default_concurrency_limit=8)
     return demo
@@ -1222,6 +1560,8 @@ def bootstrap():
     ensure_json_list_file(WISSEN_FILE)
     ensure_json_list_file(CHAT_FILE)
     sync_wissen_from_hf()
     api_chat_historie = load_chat_history()

 import torch
 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer, AutoModel
 from huggingface_hub import hf_hub_download, upload_file
 try:
 # =========================================================
 # --- MODELL ---
+MODEL_NAME = "Qwen/Qwen2.5-3B-Instruct"
 # Beispiele:
 # "Qwen/Qwen3-0.6B"       → sehr klein, schnell, schwächer
 # "Qwen/Qwen3-1.7B"       → gute Balance (empfohlen)
 WISSEN_FILE = os.path.join(DATA_DIR, "wissen.json")
 CHAT_FILE   = os.path.join(DATA_DIR, "chat_history.json")
 LOG_FILE    = os.path.join(DATA_DIR, "ai_log.txt")
+SETTINGS_FILE = os.path.join(DATA_DIR, "settings.json")
 # --- ANTWORTLÄNGE (wie lang darf die KI antworten?) ---
 MAX_NEW_TOKENS_CHAT    = 80   # Normale Chat-Antwort
 # --- KI-PERSÖNLICHKEIT ---
 AI_NAME = "RedJul2110"
 FALLBACK_NO_INFO = "Dazu habe ich gerade keine sichere Antwort."
+SYSTEM_PROMPT_ZUSATZ = "Du bist ein brillanter KI-Assistent. Du kannst sehr gut programmieren und Code-Beispiele liefern."
 # --- WISSENSDATENBANK ---
 USE_QWEN_POLISH          = True   # True = KI verfeinert Antworten mit Wissen
 # =========================================================
 model = None
 tokenizer = None
+embed_model = None
+embed_tokenizer = None
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 knowledge_lock = threading.Lock()
 letzte_api_latenz = None
 letzter_fehler = None
+def load_settings():
+    global TEMPERATURE_CHAT, MAX_NEW_TOKENS_CHAT, AI_NAME, SYSTEM_PROMPT_ZUSATZ
+    if os.path.exists(SETTINGS_FILE):
+        try:
+            with open(SETTINGS_FILE, "r", encoding="utf-8") as f:
+                s = json.load(f)
+                TEMPERATURE_CHAT = s.get("temperature", TEMPERATURE_CHAT)
+                MAX_NEW_TOKENS_CHAT = s.get("max_tokens", MAX_NEW_TOKENS_CHAT)
+                AI_NAME = s.get("ai_name", AI_NAME)
+                SYSTEM_PROMPT_ZUSATZ = s.get("system_prompt", SYSTEM_PROMPT_ZUSATZ)
+        except Exception:
+            pass
+def save_settings(pw, t, m, n, p):
+    global TEMPERATURE_CHAT, MAX_NEW_TOKENS_CHAT, AI_NAME, SYSTEM_PROMPT_ZUSATZ
+    if pw != ADMIN_CODE:
+        return "❌ Falscher Admin-Code."
+    try:
+        TEMPERATURE_CHAT = float(t)
+        MAX_NEW_TOKENS_CHAT = int(m)
+        AI_NAME = str(n).strip()
+        SYSTEM_PROMPT_ZUSATZ = str(p).strip()
+        s = {
+            "temperature": TEMPERATURE_CHAT,
+            "max_tokens": MAX_NEW_TOKENS_CHAT,
+            "ai_name": AI_NAME,
+            "system_prompt": SYSTEM_PROMPT_ZUSATZ
+        }
+        with open(SETTINGS_FILE, "w", encoding="utf-8") as f:
+            json.dump(s, f, indent=2)
+        return "✅ Einstellungen erfolgreich gespeichert!"
+    except Exception as e:
+        return f"❌ Fehler beim Speichern: {e}"
 # =========================================================
 # HILFSFUNKTIONEN
 # =========================================================
     finally:
         upload_in_progress = False
+def init_embed_model():
+    global embed_tokenizer, embed_model
+    if embed_model is not None:
+        return
+    try:
+        log_line("[INFO] Lade Embedding-Modell für semantische Suche...")
+        embed_tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
+        embed_model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2").to(device)
+        embed_model.eval()
+        log_line("[INFO] Embedding-Modell erfolgreich geladen.")
+    except Exception as e:
+        log_error("init_embed_model", e)
+def get_embedding(text):
+    init_embed_model()
+    if embed_model is None or embed_tokenizer is None or not text:
+        return []
+    try:
+        inputs = embed_tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
+        with torch.no_grad():
+            outputs = embed_model(**inputs)
+        # Mean Pooling
+        emb = outputs.last_hidden_state.mean(dim=1)
+        return emb[0].cpu().numpy().tolist()
+    except Exception as e:
+        log_error("get_embedding", e)
+        return []
+def cosine_similarity(v1, v2):
+    if not v1 or not v2 or len(v1) != len(v2):
+        return 0.0
+    import math
+    dot = sum(a * b for a, b in zip(v1, v2))
+    mag1 = math.sqrt(sum(a * a for a in v1))
+    mag2 = math.sqrt(sum(b * b for b in v2))
+    if mag1 == 0 or mag2 == 0:
+        return 0.0
+    return dot / (mag1 * mag2)
+def db_match_score(query, item, query_emb=None):
     q_norm = normalize_text(query)
     frage = normalize_text(item.get("frage", ""))
     antwort = item.get("antwort", "")
     c_tokens = text_tokens(blob_norm)
     token_score = len(q_tokens & c_tokens) / max(len(q_tokens), 1)
+    semantic_score = 0.0
+    item_emb = item.get("embedding", [])
+    if query_emb and item_emb:
+        semantic_score = cosine_similarity(query_emb, item_emb)
     bonus = 0.0
     if q_norm == frage:
         bonus += 0.35
     if kategorie and q_norm == kategorie:
         bonus += 0.08
+    return max((seq * 0.6) + (token_score * 0.4) + bonus, semantic_score + bonus)
 def exact_db_answer(user_message):
     q = normalize_text(user_message)
     best_item = None
     best_score = 0.0
+    query_emb = get_embedding(user_message)
     for item in data:
+        score = db_match_score(user_message, item, query_emb=query_emb)
         if score > best_score:
             best_score = score
             best_item = item
         return []
     scored = []
+    query_emb = get_embedding(query)
     for item in data:
+        score = db_match_score(query, item, query_emb=query_emb)
         if score >= min_score:
             scored.append((score, item))
         return "Keine Einträge vorhanden."
     scored = []
+    query_emb = get_embedding(query)
     for item in data:
+        score = db_match_score(query, item, query_emb=query_emb)
         if score >= DB_FACT_MATCH_THRESHOLD:
             scored.append((score, item))
         threading.Thread(target=upload_wissen_background, daemon=True).start()
         return True, "✅ Alle Wissenseinträge wurden gelöscht."
+def save_knowledge_entry(frage, antwort, kategorie="", quelle="", embedding=None):
     global letzte_wissensänderung
     frage = (frage or "").strip()
             if normalize_text(item.get("frage", "")) == q_norm:
                 return False, "ℹ️ Dieser Eintrag ist schon vorhanden."
+        if embedding is None:
+            embedding = get_embedding(frage)
         entry = {
             "frage": frage,
             "antwort": antwort,
             "kategorie": kategorie,
             "quelle": quelle,
+            "embedding": embedding,
             "created_at": now_str()
         }
         data.append(entry)
 def save_chat_history(history):
     save_json_list(CHAT_FILE, history)
+def get_chat_session(session_id="default"):
+    global api_chat_historie
+    if not isinstance(api_chat_historie, dict):
+        if isinstance(api_chat_historie, list):
+            api_chat_historie = {"default": api_chat_historie}
+        else:
+            api_chat_historie = {"default": []}
+    if session_id not in api_chat_historie:
+        api_chat_historie[session_id] = []
+    return api_chat_historie[session_id]
 def reset_chat_history():
     global api_chat_historie
     with chat_lock:
+        api_chat_historie = {"default": []}
         save_chat_history(api_chat_historie)
     log_line("[CHAT] Chat-Historie zurückgesetzt.")
     return True, "✅ Chat-Historie gelöscht."
     return text
+def model_generate_stream(messages_history, max_new_tokens=120, temperature=0.6, do_sample=True):
+    if model is None or tokenizer is None:
+        yield "Modell nicht geladen."
+        return
+    prompt_text = format_messages_for_model(messages_history)
+    if not prompt_text:
+        return
+    inputs = tokenizer(
+        [prompt_text],
+        return_tensors="pt",
+        truncation=True,
+        max_length=2048
+    ).to(device)
+    attention_mask = inputs.get("attention_mask", None)
+    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+    generation_kwargs = {
+        "input_ids": inputs.input_ids,
+        "attention_mask": attention_mask,
+        "max_new_tokens": max_new_tokens,
+        "do_sample": do_sample,
+        "temperature": temperature,
+        "top_p": 0.90,
+        "top_k": 40,
+        "repetition_penalty": REPETITION_PENALTY,
+        "no_repeat_ngram_size": NO_REPEAT_NGRAM_SIZE,
+        "pad_token_id": tokenizer.eos_token_id,
+        "eos_token_id": tokenizer.eos_token_id,
+        "streamer": streamer
+    }
+    thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
+    thread.start()
+    generated_text = ""
+    for new_text in streamer:
+        generated_text += new_text
+        yield generated_text.strip()
 def build_system_prompt(user_message=""):
     facts = find_relevant_facts(user_message, max_items=6)
     if not facts:
     # Falls keine Fakten da sind oder das Polieren Mist war: Normaler Chat
     return general_chat_reply(user_message, history_context)
+def general_chat_reply_stream(user_message, history_context=""):
+    if model is None or tokenizer is None:
+        yield "Dazu habe ich gerade keine sichere Antwort."
+        return
+    messages = [
+        {
+            "role": "system",
+            "content": f"Du bist {AI_NAME}. Antworte immer auf Deutsch. Kurz, direkt, hilfreich. Keine Floskeln. Bei Unsicherheit: 'Ich bin nicht sicher, aber ich glaube...'"
+        },
+        {
+            "role": "user",
+            "content": user_message
+        }
+    ]
+    try:
+        for chunk in model_generate_stream(messages, max_new_tokens=MAX_NEW_TOKENS_CHAT, temperature=TEMPERATURE_CHAT, do_sample=True):
+            yield chunk or "Dazu habe ich gerade keine sichere Antwort."
+    except Exception as e:
+        log_error("general_chat_reply_stream", e)
+        yield "Dazu habe ich gerade keine sichere Antwort."
+def generate_reply_stream(user_message, history_context=""):
+    query = f"{user_message} {history_context}".strip()
+    facts = find_relevant_facts(query, max_items=6)
+    facts = dedupe_facts(facts)
+    exact = exact_db_answer(user_message)
+    if exact and not is_generic_or_placeholder_answer(exact):
+        extra_fact = {
+            "frage": user_message,
+            "antwort": exact,
+            "kategorie": "",
+            "quelle": "",
+            "created_at": ""
+        }
+        facts = dedupe_facts([extra_fact] + facts)
+    draft = compose_draft_from_facts(facts)
+    if facts and len(facts) > 0:
+        reply = polish_with_model(user_message, draft, facts, history_context)
+        if reply and not is_generic_or_placeholder_answer(reply):
+            yield reply
+            return
+    # Normaler Chat Stream
+    for chunk in general_chat_reply_stream(user_message, history_context):
+        yield chunk
 # =========================================================
 # API
 # =========================================================
+def gradio_simple_api(user_message, session_id="default"):
     global api_chat_historie, letzte_api_latenz
     start = time.perf_counter()
+    session_id = session_id or "default"
     with chat_lock:
+        history = get_chat_session(session_id)
+        history_context = history_to_context(history)
         reply = generate_reply(user_message, history_context=history_context)
+        history.append({"role": "user", "content": user_message})
+        history.append({"role": "assistant", "content": reply})
+        if len(history) > MAX_CHAT_HISTORY:
+            api_chat_historie[session_id] = history[-MAX_CHAT_HISTORY:]
         save_chat_history(api_chat_historie)
+        log_line(f"[USER] {user_message} (Session: {session_id})")
         log_line(f"[ASSISTANT] {reply}")
     letzte_api_latenz = f"{(time.perf_counter() - start) * 1000:.2f} ms"
     return reply
+def gradio_stream_api(user_message, session_id="default"):
+    global api_chat_historie, letzte_api_latenz
+    start = time.perf_counter()
+    session_id = session_id or "default"
+    with chat_lock:
+        history = get_chat_session(session_id)
+        history_context = history_to_context(history)
+    reply = ""
+    for chunk in generate_reply_stream(user_message, history_context=history_context):
+        reply = chunk
+        yield reply
+    with chat_lock:
+        history = get_chat_session(session_id)
+        history.append({"role": "user", "content": user_message})
+        history.append({"role": "assistant", "content": reply})
+        if len(history) > MAX_CHAT_HISTORY:
+            api_chat_historie[session_id] = history[-MAX_CHAT_HISTORY:]
+        save_chat_history(api_chat_historie)
+        log_line(f"[USER] {user_message} (Session: {session_id})")
+        log_line(f"[ASSISTANT] {reply}")
+    letzte_api_latenz = f"{(time.perf_counter() - start) * 1000:.2f} ms"
 # =========================================================
 # UI FUNKTIONEN
 # =========================================================
     ok, msg = save_knowledge_entry(frage, antwort, kategorie)
     return msg
+def ui_link_lernen_multi(passwort, urls_text, thema, kategorie):
+    if passwort != ADMIN_CODE:
+        return "❌ Zugriff verweigert! Falscher Admin-Code."
+    urls = [u.strip() for u in urls_text.replace(",", "\n").split("\n") if u.strip()]
+    if not urls:
+        return "❌ Keine gültigen URLs gefunden."
+    results = []
+    for u in urls:
+        ok, msg = save_link_as_knowledge(u, thema, kategorie)
+        results.append(f"[{u}]: {msg}")
+    return "\n\n".join(results)
+def extract_pdf_text_fallback(filepath):
+    try:
+        import PyPDF2
+        text = ""
+        with open(filepath, "rb") as f:
+            reader = PyPDF2.PdfReader(f)
+            for page in reader.pages:
+                page_text = page.extract_text()
+                if page_text:
+                    text += page_text + "\n"
+        return text.strip()
+    except ImportError:
+        return "ERROR_NO_PYPDF2"
+    except Exception as e:
+        return f"ERROR_READ: {e}"
+def ui_pdf_lernen(passwort, file_obj, thema, kategorie):
     if passwort != ADMIN_CODE:
         return "❌ Zugriff verweigert! Falscher Admin-Code."
+    if file_obj is None:
+        return "❌ Bitte eine Datei hochladen."
+    filepath = file_obj if type(file_obj) is str else getattr(file_obj, "name", None)
+    if not filepath:
+        return "❌ Dateipfad konnte nicht ermittelt werden."
+    text = extract_pdf_text_fallback(filepath)
+    if text == "ERROR_NO_PYPDF2":
+        return "❌ Das Paket 'PyPDF2' fehlt. Bitte füge 'PyPDF2' zu deiner 'requirements.txt' in Hugging Face hinzu!"
+    elif text.startswith("ERROR_READ:"):
+        return f"❌ Fehler beim Lesen: {text}"
+    if len(text) < 50:
+        return "❌ In der PDF konnte kein/kaum Text gefunden werden."
+    topic = thema or "PDF Dokument"
+    cat = kategorie or "dokument"
+    summary = summarize_web_text(topic, text)
+    if not summary or len(summary.strip()) < 30:
+        summary = heuristic_summary(text, max_sentences=6, max_chars=3000)
+    ok, msg = save_knowledge_entry(frage=topic, antwort=summary, kategorie=cat, quelle="PDF Upload")
     return msg
 def ui_wissen_suchen(suchbegriff):
 # APP
 # =========================================================
 def erzeuge_gradio_app():
+    custom_css = """
+    body { background: linear-gradient(135deg, #0f2027, #203a43, #2c5364); color: #fff; font-family: 'Inter', sans-serif; }
+    .gradio-container { background: rgba(255, 255, 255, 0.05); backdrop-filter: blur(15px); border-radius: 12px; border: 1px solid rgba(255,255,255,0.1); box-shadow: 0 8px 32px 0 rgba(0, 0, 0, 0.37); }
+    button.primary { background: linear-gradient(90deg, #00C9FF 0%, #92FE9D 100%); border: none; color: black; font-weight: bold; }
+    button.primary:hover { transform: translateY(-2px); box-shadow: 0 5px 15px rgba(0,201,255,0.4); }
+    """
+    with gr.Blocks(title="Privates KI Kontrollzentrum", theme=gr.themes.Soft(), css=custom_css) as demo:
         hidden_msg = gr.Textbox(value="", visible=False)
+        hidden_session = gr.Textbox(value="default", visible=False)
         hidden_out = gr.Textbox(value="", visible=False)
         api_trigger = gr.Button(visible=False)
         api_trigger.click(
             gradio_simple_api,
+            inputs=[hidden_msg, hidden_session],
             outputs=[hidden_out],
             api_name="predict"
         )
+        api_trigger_stream = gr.Button(visible=False)
+        api_trigger_stream.click(
+            gradio_stream_api,
+            inputs=[hidden_msg, hidden_session],
+            outputs=[hidden_out],
+            api_name="stream"
+        )
         gr.Markdown("# 🤖 Privates KI Kontrollzentrum")
         gr.Markdown("Die KI nutzt zuerst ihre eigenen Antworten. Gefundene Fakten aus der Datenbank dienen nur als Zusatzwissen.")
+        with gr.Tab("📊 System Status"):
             status_text = gr.Textbox(label="Systembericht", lines=16, interactive=False)
             with gr.Row():
                 refresh_btn = gr.Button("Status aktualisieren")
             sync_btn.click(ui_sync_wissen, outputs=status_text)
             demo.load(ui_zeige_status, outputs=status_text)
+        with gr.Tab("🔒 Admin-Bereich"):
+            login_col = gr.Column(visible=True)
+            admin_col = gr.Column(visible=False)
+            with login_col:
+                gr.Markdown("### Bitte Admin-Code eingeben, um Einstellungen und Lern-Tools freizuschalten.")
+                admin_pw = gr.Textbox(label="Admin-Code", type="password")
+                login_btn = gr.Button("Login", variant="primary")
+                login_err = gr.Markdown(visible=False)
+            with admin_col:
+                with gr.Tabs():
+                    with gr.Tab("🧠 Wissen lernen"):
+                        gr.Markdown("Speichere neue Fakten direkt in die Datenbank.")
+                        q_input = gr.Textbox(label="Thema / Stichwort", placeholder="z. B. Frankreich, Mars")
+                        a_input = gr.Textbox(label="Text", placeholder="Langer Infotext", lines=6)
+                        k_input = gr.Textbox(label="Kategorie / Bereich (optional)", placeholder="z. B. Geschichte")
+                        lern_btn = gr.Button("Wissen speichern", variant="primary")
+                        lern_out = gr.Textbox(label="Ergebnis", interactive=False)
+                        lern_btn.click(lambda q, a, k: ui_web_lernen(ADMIN_CODE, q, a, k), inputs=[q_input, a_input, k_input], outputs=lern_out)
+                    with gr.Tab("🌐 Link lernen (Multi)"):
+                        gr.Markdown("Ein oder mehrere öffentliche Links einfügen (durch neue Zeile getrennt). Die KI liest und lernt diese.")
+                        link_urls = gr.Textbox(label="Links (Eine URL pro Zeile)", placeholder="https://...\nhttps://...", lines=5)
+                        link_topic = gr.Textbox(label="Thema / Stichwort (optional)")
+                        link_cat = gr.Textbox(label="Kategorie / Bereich (optional)")
+                        link_btn = gr.Button("Links lernen", variant="primary")
+                        link_out = gr.Textbox(label="Ergebnis", lines=8, interactive=False)
+                        link_btn.click(lambda u, t, c: ui_link_lernen_multi(ADMIN_CODE, u, t, c), inputs=[link_urls, link_topic, link_cat], outputs=link_out)
+                    with gr.Tab("📄 PDF lernen"):
+                        gr.Markdown("Lade eine PDF-Datei hoch, um ihren Text zu analysieren und als Wissen zu speichern.")
+                        pdf_file = gr.File(label="PDF Datei", file_types=[".pdf"])
+                        pdf_topic = gr.Textbox(label="Thema / Stichwort (optional)")
+                        pdf_cat = gr.Textbox(label="Kategorie / Bereich (optional)")
+                        pdf_btn = gr.Button("Dokument lernen", variant="primary")
+                        pdf_out = gr.Textbox(label="Ergebnis", lines=6, interactive=False)
+                        pdf_btn.click(lambda f, t, c: ui_pdf_lernen(ADMIN_CODE, f, t, c), inputs=[pdf_file, pdf_topic, pdf_cat], outputs=pdf_out)
+                    with gr.Tab("🔍 Suchen / Löschen"):
+                        gr.Markdown("Suche in der Datenbank oder lösche Einträge wieder.")
+                        search_box = gr.Textbox(label="Suchbegriff", placeholder="z. B. Frankreich")
+                        search_btn = gr.Button("Suchen")
+                        search_out = gr.Textbox(label="Treffer", lines=12, interactive=False)
+                        del_box = gr.Textbox(label="Löschen nach Begriff", placeholder="z. B. Frankreich")
+                        del_btn = gr.Button("Löschen", variant="secondary")
+                        del_out = gr.Textbox(label="Lösch-Ergebnis", interactive=False)
+                        all_del_btn = gr.Button("ALLES löschen", variant="stop")
+                        all_del_out = gr.Textbox(label="Alles löschen", interactive=False)
+                        search_btn.click(ui_wissen_suchen, inputs=[search_box], outputs=search_out)
+                        del_btn.click(lambda d: ui_wissen_loeschen(ADMIN_CODE, d), inputs=[del_box], outputs=del_out)
+                        all_del_btn.click(lambda: ui_wissen_alle_loeschen(ADMIN_CODE), inputs=[], outputs=all_del_out)
+                    with gr.Tab("⚙️ Einstellungen"):
+                        gr.Markdown("Konfiguriere das Verhalten der KI direkt ohne Neustart/Terminal.")
+                        set_temp = gr.Slider(0.01, 2.0, value=float(TEMPERATURE_CHAT), label="Kreativität (Temperature)")
+                        set_max = gr.Slider(10, 2048, step=1, value=int(MAX_NEW_TOKENS_CHAT), label="Max Antwort-Länge (Tokens)")
+                        set_name = gr.Textbox(value=str(AI_NAME), label="KI Name")
+                        set_prompt = gr.Textbox(value=str(SYSTEM_PROMPT_ZUSATZ), label="System Prompt Zusatz", lines=3)
+                        set_btn = gr.Button("Speichern", variant="primary")
+                        set_out = gr.Textbox(label="Ergebnis", interactive=False)
+                        set_btn.click(lambda t, m, n, p: save_settings(ADMIN_CODE, t, m, n, p), inputs=[set_temp, set_max, set_name, set_prompt], outputs=set_out)
+            def do_login(pw):
+                if pw == ADMIN_CODE:
+                    return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
+                return gr.update(visible=True), gr.update(visible=False), gr.update(value="**❌ Falscher Admin-Code**", visible=True)
+            login_btn.click(do_login, inputs=[admin_pw], outputs=[login_col, admin_col, login_err])
         demo.queue(default_concurrency_limit=8)
     return demo
     ensure_json_list_file(WISSEN_FILE)
     ensure_json_list_file(CHAT_FILE)
+    load_settings()
     sync_wissen_from_hf()
     api_chat_historie = load_chat_history()