martinbrahm commited on
Commit
45abf5d
·
verified ·
1 Parent(s): bbd2971

Upload main.py

Browse files
Files changed (1) hide show
  1. main.py +61 -24
main.py CHANGED
@@ -1,6 +1,7 @@
1
  from fastapi import FastAPI, Request
2
  import json
3
  import os
 
4
  import firebase_admin
5
  from firebase_admin import credentials, firestore
6
  from datetime import datetime
@@ -10,7 +11,7 @@ app = FastAPI()
10
  # --- SETUP ---
11
  COLLECTION_KNOWLEDGE = "knowledge_base"
12
  COLLECTION_RULES = "availability_rules"
13
- COLLECTION_INBOX = "inbox" # NEU: Hier landen ungelöste Fragen
14
  KNOWLEDGE_CACHE = []
15
 
16
  # --- FIREBASE VERBINDUNG ---
@@ -43,12 +44,21 @@ def reload_knowledge():
43
  async def startup():
44
  reload_knowledge()
45
 
 
46
  def get_stem(word):
 
47
  w = word.lower().strip()
48
- for end in ["ern", "en", "er", "es", "st", "te", "e", "s", "t"]:
49
- if w.endswith(end) and len(w) > len(end)+2: return w[:-len(end)]
 
 
50
  return w
51
 
 
 
 
 
 
52
  # --- HELPER: VAPI REQUEST PARSER ---
53
  def parse_vapi_request(data):
54
  tool_call_id = "unknown"
@@ -90,7 +100,6 @@ async def check_availability(request: Request):
90
  rd = r.to_dict()
91
  if rd.get('start_date') <= today <= rd.get('end_date'):
92
  print(f"🛑 REGEL AKTIV: {rd.get('name')}")
93
- # Einfache Logik: Wenn "Ferien" im Namen -> Limited, sonst Unavailable
94
  if "ferien" in rd.get('name', '').lower():
95
  status = "limited"
96
  else:
@@ -105,7 +114,7 @@ async def check_availability(request: Request):
105
  }
106
 
107
  # ==========================================
108
- # TOOL 2: SUCHE (Mit Inbox-Speicherung!)
109
  # ==========================================
110
  @app.post("/search")
111
  async def search(request: Request):
@@ -113,47 +122,75 @@ async def search(request: Request):
113
  tool_call_id, args = parse_vapi_request(data)
114
  query = args.get("search_query") or args.get("query") or data.get("search_query")
115
 
116
- print(f"🔎 FRAGE (ID: {tool_call_id}): '{query}'")
117
 
118
  answer_text = "Dazu habe ich leider keine Informationen in meiner Datenbank."
119
 
120
  if query:
121
- STOP_WORDS = ["hallo", "guten", "tag", "moin", "bitte", "danke", "frage"]
122
- q_words = [get_stem(w) for w in query.lower().split() if len(w)>2]
123
- relevant_words = [w for w in q_words if w not in STOP_WORDS]
 
 
 
 
 
 
 
124
 
125
  found = False
126
 
127
- if relevant_words:
128
  best_doc = None
129
  best_score = 0
 
130
  for doc in KNOWLEDGE_CACHE:
131
  score = 0
132
- title = doc.get("question", "").lower()
133
- content = doc.get("answer", "").lower()
134
- keywords = [k.lower() for k in doc.get("keywords", [])]
 
 
 
135
 
136
- for word in relevant_words:
137
- if word in title: score += 50
138
- for k in keywords:
139
- if get_stem(k) == get_stem(word): score += 30
140
- if word in content: score += 5
 
 
 
 
 
 
 
 
 
 
 
141
 
142
  if score > best_score:
143
  best_score = score
144
  best_doc = doc
 
 
145
 
146
- # SCHWELLE: 20 PUNKTE
147
- if best_doc and best_score >= 20:
148
- print(f"🏆 TREFFER ({best_score}): {best_doc.get('question')}")
 
 
 
 
149
  answer_text = best_doc.get("answer")
150
  found = True
151
  else:
152
- print(f"⚠️ Zu wenig Relevanz (Max: {best_score})")
153
 
154
- # --- NEU: SPEICHERN WENN NICHT GEFUNDEN ---
155
  if not found and db:
156
- print("📥 Speichere in Inbox...")
157
  db.collection(COLLECTION_INBOX).add({
158
  "query": query,
159
  "timestamp": datetime.now(),
 
1
  from fastapi import FastAPI, Request
2
  import json
3
  import os
4
+ import re
5
  import firebase_admin
6
  from firebase_admin import credentials, firestore
7
  from datetime import datetime
 
11
  # --- SETUP ---
12
  COLLECTION_KNOWLEDGE = "knowledge_base"
13
  COLLECTION_RULES = "availability_rules"
14
+ COLLECTION_INBOX = "inbox"
15
  KNOWLEDGE_CACHE = []
16
 
17
  # --- FIREBASE VERBINDUNG ---
 
44
  async def startup():
45
  reload_knowledge()
46
 
47
+ # --- HELPER: STEMMING & TOKENIZING ---
48
  def get_stem(word):
49
+ # Einfaches Stemming (Wortstamm finden)
50
  w = word.lower().strip()
51
+ suffixes = ["ungen", "innen", "keit", "sch", "ern", "en", "er", "es", "st", "te", "e", "s", "t"]
52
+ for end in suffixes:
53
+ if w.endswith(end) and len(w) > len(end) + 2:
54
+ return w[:-len(end)]
55
  return w
56
 
57
+ def tokenize(text):
58
+ # Zerlegt Text in saubere Wort-Liste (ohne Punkt/Komma)
59
+ clean_text = re.sub(r'[^\w\s]', '', text.lower())
60
+ return [get_stem(w) for w in clean_text.split() if w]
61
+
62
  # --- HELPER: VAPI REQUEST PARSER ---
63
  def parse_vapi_request(data):
64
  tool_call_id = "unknown"
 
100
  rd = r.to_dict()
101
  if rd.get('start_date') <= today <= rd.get('end_date'):
102
  print(f"🛑 REGEL AKTIV: {rd.get('name')}")
 
103
  if "ferien" in rd.get('name', '').lower():
104
  status = "limited"
105
  else:
 
114
  }
115
 
116
  # ==========================================
117
+ # TOOL 2: SUCHE (KEYWORD ROUTING)
118
  # ==========================================
119
  @app.post("/search")
120
  async def search(request: Request):
 
122
  tool_call_id, args = parse_vapi_request(data)
123
  query = args.get("search_query") or args.get("query") or data.get("search_query")
124
 
125
+ print(f"🔎 QUERY: '{query}'")
126
 
127
  answer_text = "Dazu habe ich leider keine Informationen in meiner Datenbank."
128
 
129
  if query:
130
+ # Stop-Wörter: Diese ignorieren wir komplett, damit sie keine falschen Hits erzeugen
131
+ STOP_WORDS = [
132
+ "hallo", "guten", "tag", "moin", "bitte", "danke", "frage", "sagen",
133
+ "kannst", "du", "mir", "was", "ist", "wer", "wie", "wo", "wann",
134
+ "anbieten", "machen", "tun", "geben", "helfen", "möchte", "will", "haben"
135
+ ]
136
+
137
+ # 1. Query zerlegen
138
+ query_stems = [w for w in tokenize(query) if w not in STOP_WORDS and len(w) > 2]
139
+ print(f"🧐 Tokens: {query_stems}")
140
 
141
  found = False
142
 
143
+ if query_stems:
144
  best_doc = None
145
  best_score = 0
146
+
147
  for doc in KNOWLEDGE_CACHE:
148
  score = 0
149
+ hits = []
150
+
151
+ # Dokument zerlegen
152
+ title_stems = tokenize(doc.get("question", ""))
153
+ keyword_stems = tokenize(", ".join(doc.get("keywords", [])))
154
+ content_stems = tokenize(doc.get("answer", "")) # Inhalt ist jetzt unwichtig
155
 
156
+ for q_stem in query_stems:
157
+
158
+ # 1. KEYWORD MATCH (Der "Router") -> 100 Punkte!
159
+ if q_stem in keyword_stems:
160
+ score += 100
161
+ hits.append(f"KEYWORD '{q_stem}'")
162
+
163
+ # 2. TITEL MATCH -> 50 Punkte
164
+ elif q_stem in title_stems:
165
+ score += 50
166
+ hits.append(f"TITLE '{q_stem}'")
167
+
168
+ # 3. CONTENT MATCH -> Nur 5 Punkte (reicht alleine nicht)
169
+ elif q_stem in content_stems:
170
+ score += 5
171
+ # hits.append("content") # Loggen wir nicht, spammt sonst
172
 
173
  if score > best_score:
174
  best_score = score
175
  best_doc = doc
176
+ if score > 10:
177
+ print(f" Kandidat: {score} Pkt ({hits}) -> {doc.get('question')[:40]}...")
178
 
179
+ # SCHWELLE: 40 PUNKTE
180
+ # Ein Keyword-Treffer (100) gewinnt immer.
181
+ # Ein Titel-Treffer (50) gewinnt immer.
182
+ # Nur Fließtext (5 Pkt pro Wort) braucht mind. 8 Treffer -> Unwahrscheinlich bei falschen Dokus.
183
+
184
+ if best_doc and best_score >= 40:
185
+ print(f"🏆 GEWINNER ({best_score} Pkt): {best_doc.get('question')}")
186
  answer_text = best_doc.get("answer")
187
  found = True
188
  else:
189
+ print(f"⚠️ Kein eindeutiger Treffer (Max Score: {best_score})")
190
 
191
+ # --- INBOX LOGIK ---
192
  if not found and db:
193
+ print("📥 Ab in die Inbox.")
194
  db.collection(COLLECTION_INBOX).add({
195
  "query": query,
196
  "timestamp": datetime.now(),