martinbrahm commited on
Commit
a0a7178
·
verified ·
1 Parent(s): 45abf5d

Upload main.py

Browse files
Files changed (1) hide show
  1. main.py +51 -70
main.py CHANGED
@@ -46,7 +46,7 @@ async def startup():
46
 
47
  # --- HELPER: STEMMING & TOKENIZING ---
48
  def get_stem(word):
49
- # Einfaches Stemming (Wortstamm finden)
50
  w = word.lower().strip()
51
  suffixes = ["ungen", "innen", "keit", "sch", "ern", "en", "er", "es", "st", "te", "e", "s", "t"]
52
  for end in suffixes:
@@ -55,7 +55,7 @@ def get_stem(word):
55
  return w
56
 
57
  def tokenize(text):
58
- # Zerlegt Text in saubere Wort-Liste (ohne Punkt/Komma)
59
  clean_text = re.sub(r'[^\w\s]', '', text.lower())
60
  return [get_stem(w) for w in clean_text.split() if w]
61
 
@@ -82,39 +82,7 @@ def parse_vapi_request(data):
82
  return tool_call_id, args
83
 
84
  # ==========================================
85
- # TOOL 1: VERFÜGBARKEIT
86
- # ==========================================
87
- @app.post("/check_availability")
88
- async def check_availability(request: Request):
89
- data = await request.json()
90
- tool_call_id, _ = parse_vapi_request(data)
91
-
92
- today = datetime.now().strftime("%Y-%m-%d")
93
- status = "available"
94
- instruction = "Normal arbeiten"
95
-
96
- try:
97
- if db:
98
- rules = db.collection(COLLECTION_RULES).where("active", "==", True).stream()
99
- for r in rules:
100
- rd = r.to_dict()
101
- if rd.get('start_date') <= today <= rd.get('end_date'):
102
- print(f"🛑 REGEL AKTIV: {rd.get('name')}")
103
- if "ferien" in rd.get('name', '').lower():
104
- status = "limited"
105
- else:
106
- status = "unavailable"
107
- instruction = rd.get('instruction_text')
108
- break
109
- except Exception as e:
110
- print(f"❌ ERROR CHECK: {e}")
111
-
112
- return {
113
- "results": [{"toolCallId": tool_call_id, "result": {"status": status, "instruction": instruction}}]
114
- }
115
-
116
- # ==========================================
117
- # TOOL 2: SUCHE (KEYWORD ROUTING)
118
  # ==========================================
119
  @app.post("/search")
120
  async def search(request: Request):
@@ -123,20 +91,27 @@ async def search(request: Request):
123
  query = args.get("search_query") or args.get("query") or data.get("search_query")
124
 
125
  print(f"🔎 QUERY: '{query}'")
126
-
127
  answer_text = "Dazu habe ich leider keine Informationen in meiner Datenbank."
128
 
129
  if query:
130
- # Stop-Wörter: Diese ignorieren wir komplett, damit sie keine falschen Hits erzeugen
 
131
  STOP_WORDS = [
132
- "hallo", "guten", "tag", "moin", "bitte", "danke", "frage", "sagen",
133
- "kannst", "du", "mir", "was", "ist", "wer", "wie", "wo", "wann",
134
- "anbieten", "machen", "tun", "geben", "helfen", "möchte", "will", "haben"
 
 
 
 
 
 
 
135
  ]
136
 
137
- # 1. Query zerlegen
138
  query_stems = [w for w in tokenize(query) if w not in STOP_WORDS and len(w) > 2]
139
- print(f"🧐 Tokens: {query_stems}")
140
 
141
  found = False
142
 
@@ -148,47 +123,41 @@ async def search(request: Request):
148
  score = 0
149
  hits = []
150
 
151
- # Dokument zerlegen
152
- title_stems = tokenize(doc.get("question", ""))
153
- keyword_stems = tokenize(", ".join(doc.get("keywords", [])))
154
- content_stems = tokenize(doc.get("answer", "")) # Inhalt ist jetzt unwichtig
155
 
156
  for q_stem in query_stems:
157
 
158
  # 1. KEYWORD MATCH (Der "Router") -> 100 Punkte!
159
- if q_stem in keyword_stems:
160
  score += 100
161
  hits.append(f"KEYWORD '{q_stem}'")
162
 
163
  # 2. TITEL MATCH -> 50 Punkte
164
- elif q_stem in title_stems:
165
  score += 50
166
  hits.append(f"TITLE '{q_stem}'")
167
 
168
- # 3. CONTENT MATCH -> Nur 5 Punkte (reicht alleine nicht)
169
- elif q_stem in content_stems:
170
- score += 5
171
- # hits.append("content") # Loggen wir nicht, spammt sonst
172
 
173
  if score > best_score:
174
  best_score = score
175
  best_doc = doc
176
- if score > 10:
177
- print(f" Kandidat: {score} Pkt ({hits}) -> {doc.get('question')[:40]}...")
178
 
179
- # SCHWELLE: 40 PUNKTE
180
- # Ein Keyword-Treffer (100) gewinnt immer.
181
- # Ein Titel-Treffer (50) gewinnt immer.
182
- # Nur Fließtext (5 Pkt pro Wort) braucht mind. 8 Treffer -> Unwahrscheinlich bei falschen Dokus.
183
-
184
- if best_doc and best_score >= 40:
185
  print(f"🏆 GEWINNER ({best_score} Pkt): {best_doc.get('question')}")
186
  answer_text = best_doc.get("answer")
187
  found = True
188
  else:
189
- print(f"⚠️ Kein eindeutiger Treffer (Max Score: {best_score})")
190
 
191
- # --- INBOX LOGIK ---
192
  if not found and db:
193
  print("📥 Ab in die Inbox.")
194
  db.collection(COLLECTION_INBOX).add({
@@ -197,14 +166,26 @@ async def search(request: Request):
197
  "status": "open"
198
  })
199
 
200
- return {
201
- "results": [{"toolCallId": tool_call_id, "result": answer_text}]
202
- }
203
 
204
- @app.post("/vapi-incoming")
205
- async def dummy_incoming(request: Request):
206
- return {"status": "ok"}
 
 
 
 
 
 
 
 
 
 
 
 
 
207
 
 
 
208
  @app.get("/")
209
- def home():
210
- return {"status": "Online", "docs": len(KNOWLEDGE_CACHE)}
 
46
 
47
  # --- HELPER: STEMMING & TOKENIZING ---
48
  def get_stem(word):
49
+ # Einfaches Stemming
50
  w = word.lower().strip()
51
  suffixes = ["ungen", "innen", "keit", "sch", "ern", "en", "er", "es", "st", "te", "e", "s", "t"]
52
  for end in suffixes:
 
55
  return w
56
 
57
  def tokenize(text):
58
+ # Entfernt Sonderzeichen und zerlegt in Stämme
59
  clean_text = re.sub(r'[^\w\s]', '', text.lower())
60
  return [get_stem(w) for w in clean_text.split() if w]
61
 
 
82
  return tool_call_id, args
83
 
84
  # ==========================================
85
+ # TOOL: SUCHE (OPTIMIERT)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  # ==========================================
87
  @app.post("/search")
88
  async def search(request: Request):
 
91
  query = args.get("search_query") or args.get("query") or data.get("search_query")
92
 
93
  print(f"🔎 QUERY: '{query}'")
 
94
  answer_text = "Dazu habe ich leider keine Informationen in meiner Datenbank."
95
 
96
  if query:
97
+ # --- STOP WÖRTER LISTE (MASSIV ERWEITERT) ---
98
+ # Diese Wörter werden komplett ignoriert und geben 0 Punkte.
99
  STOP_WORDS = [
100
+ # Kommunikation
101
+ "hallo", "guten", "tag", "moin", "bitte", "danke", "frage", "sagen", "kannst", "du", "mir",
102
+ "was", "ist", "wer", "wie", "wo", "wann", "erzähl", "über", "möchte", "will", "haben",
103
+ # Artikel & Füllwörter (DIE KILLER!)
104
+ "der", "die", "das", "dem", "den", "des", "ein", "eine", "einer", "eines",
105
+ "im", "in", "von", "zu", "bei", "mit", "für", "auf", "aus", "um", "und", "oder",
106
+ # Generische Business-Wörter (die alles matchen würden)
107
+ "anbieten", "machen", "tun", "geben", "helfen", "unterstützen", "bieten",
108
+ "firma", "unternehmen", "gmbh", "produkt", "system", "plattform"
109
+ # "plattform" ist hier Stop-Wort, damit "Kosten der Plattform" nicht beim "Plattform-Feature" landet!
110
  ]
111
 
112
+ # 1. Query bereinigen
113
  query_stems = [w for w in tokenize(query) if w not in STOP_WORDS and len(w) > 2]
114
+ print(f"🧐 Relevante Tokens: {query_stems}")
115
 
116
  found = False
117
 
 
123
  score = 0
124
  hits = []
125
 
126
+ # Dokument Inhalte tokenizen
127
+ # WICHTIG: Keywords zählen wir doppelt so stark, wenn sie exakt passen
128
+ doc_keywords = [get_stem(k) for k in doc.get("keywords", [])]
129
+ doc_title_stems = tokenize(doc.get("question", ""))
130
 
131
  for q_stem in query_stems:
132
 
133
  # 1. KEYWORD MATCH (Der "Router") -> 100 Punkte!
134
+ if q_stem in doc_keywords:
135
  score += 100
136
  hits.append(f"KEYWORD '{q_stem}'")
137
 
138
  # 2. TITEL MATCH -> 50 Punkte
139
+ elif q_stem in doc_title_stems:
140
  score += 50
141
  hits.append(f"TITLE '{q_stem}'")
142
 
143
+ # (Wir ignorieren den Fließtext für das Scoring, um Zufallstreffer zu vermeiden)
 
 
 
144
 
145
  if score > best_score:
146
  best_score = score
147
  best_doc = doc
148
+ if score > 0:
149
+ print(f" Kandidat: {score} Pkt ({hits}) -> {doc.get('question')[:30]}...")
150
 
151
+ # SCHWELLE: 50 PUNKTE
152
+ # Es muss mindestens ein Titel-Treffer (50) oder Keyword (100) sein.
153
+ if best_doc and best_score >= 50:
 
 
 
154
  print(f"🏆 GEWINNER ({best_score} Pkt): {best_doc.get('question')}")
155
  answer_text = best_doc.get("answer")
156
  found = True
157
  else:
158
+ print(f"⚠️ Kein Treffer (Max Score: {best_score})")
159
 
160
+ # --- INBOX ---
161
  if not found and db:
162
  print("📥 Ab in die Inbox.")
163
  db.collection(COLLECTION_INBOX).add({
 
166
  "status": "open"
167
  })
168
 
169
+ return {"results": [{"toolCallId": tool_call_id, "result": answer_text}]}
 
 
170
 
171
+ # --- ANDERE ENDPOINTS ---
172
+ @app.post("/check_availability")
173
+ async def check_availability(request: Request):
174
+ data = await request.json()
175
+ tool_call_id, _ = parse_vapi_request(data)
176
+ today = datetime.now().strftime("%Y-%m-%d")
177
+ status, instruction = "available", "Normal arbeiten"
178
+ if db:
179
+ rules = db.collection(COLLECTION_RULES).where("active", "==", True).stream()
180
+ for r in rules:
181
+ rd = r.to_dict()
182
+ if rd.get('start_date') <= today <= rd.get('end_date'):
183
+ status = "limited" if "ferien" in rd.get('name', '').lower() else "unavailable"
184
+ instruction = rd.get('instruction_text')
185
+ break
186
+ return {"results": [{"toolCallId": tool_call_id, "result": {"status": status, "instruction": instruction}}]}
187
 
188
+ @app.post("/vapi-incoming")
189
+ async def dummy_incoming(request: Request): return {"status": "ok"}
190
  @app.get("/")
191
+ def home(): return {"status": "Online"}