martinbrahm commited on
Commit
3e87852
·
verified ·
1 Parent(s): db10d88

Upload main.py

Browse files
Files changed (1) hide show
  1. main.py +87 -66
main.py CHANGED
@@ -11,7 +11,7 @@ app = FastAPI()
11
  COLLECTION_KNOWLEDGE = "knowledge_base"
12
  COLLECTION_INBOX = "inbox"
13
 
14
- # --- GLOBALE VARIABLE (Der Turbo-Speicher) ---
15
  KNOWLEDGE_CACHE = []
16
 
17
  # --- FIREBASE VERBINDUNG ---
@@ -30,119 +30,140 @@ try:
30
  except Exception as e:
31
  print(f"❌ FEHLER beim Start: {e}")
32
 
33
- # --- HILFSFUNKTION: DATEN IN RAM LADEN ---
34
  def reload_knowledge():
35
  global KNOWLEDGE_CACHE
36
- if not db:
37
- return 0
38
-
39
- print("🔄 Lade Wissensdatenbank in den Arbeitsspeicher...")
40
  try:
41
  docs = db.collection(COLLECTION_KNOWLEDGE).stream()
42
  new_cache = []
43
  for doc in docs:
44
  d = doc.to_dict()
45
- d["id"] = doc.id # ID speichern für Logs
46
  new_cache.append(d)
47
-
48
  KNOWLEDGE_CACHE = new_cache
49
- print(f"🚀 TURBO-MODE: {len(KNOWLEDGE_CACHE)} Dokumente im RAM bereit!")
50
  return len(KNOWLEDGE_CACHE)
51
  except Exception as e:
52
- print(f"❌ Fehler beim Laden des Caches: {e}")
53
  return 0
54
 
55
- # --- STARTUP EVENT (Lädt Daten sofort beim Start) ---
56
  @app.on_event("startup")
57
  async def startup_event():
58
  reload_knowledge()
59
 
60
  # --- ENDPUNKTE ---
61
-
62
  @app.get("/")
63
  def home():
64
- return {
65
- "status": "Turbo-Agent ist bereit.",
66
- "cached_docs": len(KNOWLEDGE_CACHE),
67
- "info": "Nutze /refresh_knowledge um neue Daten zu laden."
68
- }
69
 
70
  @app.get("/refresh_knowledge")
71
  def refresh_endpoint():
72
  count = reload_knowledge()
73
  return {"status": "Cache aktualisiert", "docs_loaded": count}
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  @app.post("/search")
76
  async def search_knowledge(request: Request):
77
- # 1. FRAGE EMPFANGEN
78
  try:
79
  data = await request.json()
80
  except:
81
  return {"result": "Fehler: Kein JSON."}
82
 
83
- # Frage extrahieren (Vapi/Retell kompatibel)
84
  query_text = ""
85
- if "query" in data:
86
- query_text = data["query"]
87
- elif "message" in data and "toolCalls" in data["message"]:
88
  try:
89
  args = data["message"]["toolCalls"][0]["function"]["arguments"]
90
  query_text = json.loads(args).get("query", "") if isinstance(args, str) else args.get("query", "")
91
  except: pass
92
-
93
- # Retell AI spezifisch (falls Retell die Frage anders schickt)
94
- if not query_text and "args" in data:
95
- query_text = data["args"].get("query", "")
96
 
97
- print(f"🔎 TURBO-SEARCH: '{query_text}'")
98
 
99
- if not query_text:
100
- return {"result": "Ich habe die Frage akustisch nicht verstanden."}
101
 
102
- # 2. SUCHEN IM RAM (Rasend schnell!)
103
- antwort = "Dazu habe ich leider keine Informationen in meiner Datenbank. Ich habe die Frage für das Team notiert."
104
- treffer = False
105
 
106
- query_lower = query_text.lower()
 
 
107
 
108
- # Wir iterieren durch die Liste im Speicher, nicht durch die Datenbank!
109
- for entry in KNOWLEDGE_CACHE:
110
- # Felder sicher auslesen
111
- t_answer = entry.get("answer") or entry.get("Antwort") or entry.get("content")
112
- t_keywords = entry.get("keywords") or entry.get("Keywords") or []
113
- t_question = entry.get("question") or entry.get("Frage") or ""
114
 
115
- if not t_answer:
116
- continue
 
 
 
 
 
 
 
 
 
117
 
118
- # A) Keyword-Match
119
  if isinstance(t_keywords, list):
120
- if any(k.lower() in query_lower for k in t_keywords):
121
- antwort = t_answer
122
- treffer = True
123
- print(f"✅ TREFFER (Keyword) in Doc {entry.get('id')}")
124
- break
 
 
 
125
 
126
- # B) Frage-Match
127
- if t_question and (t_question.lower() in query_lower or query_lower in t_question.lower()):
128
- antwort = t_answer
129
- treffer = True
130
- print(f"✅ TREFFER (Frage-Match) in Doc {entry.get('id')}")
131
- break
132
-
133
- # 3. LERN-LOGIK (Nur schreiben, wenn nichts gefunden)
134
- if not treffer:
135
- print(f"⚠️ KEIN TREFFER. Schreibe in '{COLLECTION_INBOX}' (DB Write)...")
136
- # Das Schreiben passiert im Hintergrund, bremst die Antwort kaum
137
- try:
138
- if db:
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  db.collection(COLLECTION_INBOX).add({
140
  "question": query_text,
141
  "status": "open",
142
- "timestamp": firestore.SERVER_TIMESTAMP,
143
- "source": "AI Call"
144
  })
145
- except Exception as e:
146
- print(f" Fehler Inbox: {e}")
147
-
148
- return {"result": antwort}
 
11
  COLLECTION_KNOWLEDGE = "knowledge_base"
12
  COLLECTION_INBOX = "inbox"
13
 
14
+ # --- GLOBALE VARIABLE (RAM) ---
15
  KNOWLEDGE_CACHE = []
16
 
17
  # --- FIREBASE VERBINDUNG ---
 
30
  except Exception as e:
31
  print(f"❌ FEHLER beim Start: {e}")
32
 
33
+ # --- LADEN ---
34
  def reload_knowledge():
35
  global KNOWLEDGE_CACHE
36
+ if not db: return 0
37
+ print("🔄 Lade Wissensdatenbank...")
 
 
38
  try:
39
  docs = db.collection(COLLECTION_KNOWLEDGE).stream()
40
  new_cache = []
41
  for doc in docs:
42
  d = doc.to_dict()
43
+ d["id"] = doc.id
44
  new_cache.append(d)
 
45
  KNOWLEDGE_CACHE = new_cache
46
+ print(f"🚀 TURBO-MODE: {len(KNOWLEDGE_CACHE)} Dokumente im RAM!")
47
  return len(KNOWLEDGE_CACHE)
48
  except Exception as e:
49
+ print(f"❌ Fehler Cache: {e}")
50
  return 0
51
 
 
52
  @app.on_event("startup")
53
  async def startup_event():
54
  reload_knowledge()
55
 
56
  # --- ENDPUNKTE ---
 
57
  @app.get("/")
58
  def home():
59
+ return {"status": "Udo Agent API (Stemming Mode) ist bereit.", "docs": len(KNOWLEDGE_CACHE)}
 
 
 
 
60
 
61
  @app.get("/refresh_knowledge")
62
  def refresh_endpoint():
63
  count = reload_knowledge()
64
  return {"status": "Cache aktualisiert", "docs_loaded": count}
65
 
66
+ # --- 🧠 HELPER: DEUTSCHE WORTSTÄMME ---
67
+ def get_stem(word):
68
+ """
69
+ Ein sehr einfacher 'Stemmer' für Deutsch.
70
+ Schneidet Endungen wie 'en', 'ern', 'te', 's' ab.
71
+ Macht aus 'Preise' -> 'preis', 'kostet' -> 'kost'.
72
+ """
73
+ w = word.lower().strip()
74
+ # Reihenfolge wichtig! Längere Endungen zuerst.
75
+ endings = ["ern", "em", "er", "en", "es", "st", "te", "e", "s", "t"]
76
+
77
+ for end in endings:
78
+ if w.endswith(end) and len(w) > (len(end) + 2): # Nicht zu viel abschneiden
79
+ return w[:-len(end)]
80
+ return w
81
+
82
+ # --- 🧠 DIE NEUE INTELLIGENTE SUCHE ---
83
  @app.post("/search")
84
  async def search_knowledge(request: Request):
 
85
  try:
86
  data = await request.json()
87
  except:
88
  return {"result": "Fehler: Kein JSON."}
89
 
90
+ # Frage extrahieren
91
  query_text = ""
92
+ if "query" in data: query_text = data["query"]
93
+ elif "message" in data:
 
94
  try:
95
  args = data["message"]["toolCalls"][0]["function"]["arguments"]
96
  query_text = json.loads(args).get("query", "") if isinstance(args, str) else args.get("query", "")
97
  except: pass
98
+ if not query_text and "args" in data: query_text = data["args"].get("query", "")
 
 
 
99
 
100
+ print(f"🔎 FRAGE: '{query_text}'")
101
 
102
+ if not query_text: return {"result": "Akustik-Fehler."}
 
103
 
104
+ # --- SCORING ALGORITHMUS V2 (Stemming) ---
105
+ best_doc = None
106
+ best_score = 0
107
 
108
+ # 1. Query vorbereiten (Wörter zerlegen & stämme bilden)
109
+ query_words_raw = query_text.lower().replace("?", "").replace(".", "").split()
110
+ query_stems = [get_stem(w) for w in query_words_raw if len(w) > 2]
111
 
112
+ print(f" ⚙️ Suchstämme: {query_stems}")
 
 
 
 
 
113
 
114
+ # Wir prüfen JEDES Dokument
115
+ for entry in KNOWLEDGE_CACHE:
116
+ score = 0
117
+ doc_id = entry.get('id', 'unknown')
118
+
119
+ # Daten holen
120
+ t_answer = entry.get("answer", "")
121
+ t_question = entry.get("question", "")
122
+ t_keywords = entry.get("keywords", [])
123
+
124
+ if not t_answer or len(t_answer) < 5: continue
125
 
126
+ # A) Keyword Treffer (+10 Punkte)
127
  if isinstance(t_keywords, list):
128
+ for k in t_keywords:
129
+ k_stem = get_stem(k)
130
+ if k_stem in query_stems:
131
+ # Spezial-Regel: 'udo' und 'capaneo' sind weniger wert, weil sie überall stehen
132
+ if k_stem in ['udo', 'capaneo']:
133
+ score += 2
134
+ else:
135
+ score += 20 # Harter Keyword Treffer ist viel wert!
136
 
137
+ # B) Frage/Titel Treffer (+15 Punkte pro Wort)
138
+ if t_question:
139
+ q_words = t_question.lower().replace("?", "").split()
140
+ for qw in q_words:
141
+ if len(qw) < 3: continue
142
+ qw_stem = get_stem(qw)
143
+ if qw_stem in query_stems:
144
+ score += 15
145
+
146
+ # Neuer Rekord?
147
+ if score > best_score:
148
+ best_score = score
149
+ best_doc = entry
150
+ # Debugging: Zeig uns Kandidaten im Log
151
+ if score > 10:
152
+ print(f" Candidate {doc_id}: {score} Pts (Titel: {t_question[:30]}...)")
153
+
154
+ # --- ERGEBNIS ---
155
+ if best_doc and best_score >= 10: # Mindestens 10 Punkte nötig
156
+ print(f"🏆 GEWINNER: Doc {best_doc['id']} mit {best_score} Punkten.")
157
+ return {"result": best_doc['answer']}
158
+ else:
159
+ print(f"⚠️ KEIN TREFFER (Best Score: {best_score}).")
160
+ # Inbox Eintrag
161
+ if db:
162
+ try:
163
  db.collection(COLLECTION_INBOX).add({
164
  "question": query_text,
165
  "status": "open",
166
+ "timestamp": firestore.SERVER_TIMESTAMP
 
167
  })
168
+ except: pass
169
+ return {"result": "Dazu habe ich leider keine Informationen in meiner Datenbank."}