martinbrahm commited on
Commit
fa850df
·
verified ·
1 Parent(s): 3e87852

Upload main.py

Browse files
Files changed (1) hide show
  1. main.py +36 -17
main.py CHANGED
@@ -101,11 +101,11 @@ async def search_knowledge(request: Request):
101
 
102
  if not query_text: return {"result": "Akustik-Fehler."}
103
 
104
- # --- SCORING ALGORITHMUS V2 (Stemming) ---
105
  best_doc = None
106
  best_score = 0
107
 
108
- # 1. Query vorbereiten (Wörter zerlegen & stämme bilden)
109
  query_words_raw = query_text.lower().replace("?", "").replace(".", "").split()
110
  query_stems = [get_stem(w) for w in query_words_raw if len(w) > 2]
111
 
@@ -115,26 +115,21 @@ async def search_knowledge(request: Request):
115
  for entry in KNOWLEDGE_CACHE:
116
  score = 0
117
  doc_id = entry.get('id', 'unknown')
118
-
119
- # Daten holen
120
  t_answer = entry.get("answer", "")
121
  t_question = entry.get("question", "")
122
  t_keywords = entry.get("keywords", [])
123
 
124
  if not t_answer or len(t_answer) < 5: continue
125
 
126
- # A) Keyword Treffer (+10 Punkte)
127
  if isinstance(t_keywords, list):
128
  for k in t_keywords:
129
  k_stem = get_stem(k)
130
  if k_stem in query_stems:
131
- # Spezial-Regel: 'udo' und 'capaneo' sind weniger wert, weil sie überall stehen
132
- if k_stem in ['udo', 'capaneo']:
133
- score += 2
134
- else:
135
- score += 20 # Harter Keyword Treffer ist viel wert!
136
 
137
- # B) Frage/Titel Treffer (+15 Punkte pro Wort)
138
  if t_question:
139
  q_words = t_question.lower().replace("?", "").split()
140
  for qw in q_words:
@@ -143,27 +138,51 @@ async def search_knowledge(request: Request):
143
  if qw_stem in query_stems:
144
  score += 15
145
 
146
- # Neuer Rekord?
147
  if score > best_score:
148
  best_score = score
149
  best_doc = entry
150
- # Debugging: Zeig uns Kandidaten im Log
151
  if score > 10:
152
  print(f" Candidate {doc_id}: {score} Pts (Titel: {t_question[:30]}...)")
153
 
154
- # --- ERGEBNIS ---
155
- if best_doc and best_score >= 10: # Mindestens 10 Punkte nötig
 
 
 
 
156
  print(f"🏆 GEWINNER: Doc {best_doc['id']} mit {best_score} Punkten.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  return {"result": best_doc['answer']}
 
158
  else:
159
  print(f"⚠️ KEIN TREFFER (Best Score: {best_score}).")
160
- # Inbox Eintrag
161
  if db:
162
  try:
163
  db.collection(COLLECTION_INBOX).add({
164
  "question": query_text,
165
  "status": "open",
166
- "timestamp": firestore.SERVER_TIMESTAMP
 
167
  })
168
  except: pass
169
  return {"result": "Dazu habe ich leider keine Informationen in meiner Datenbank."}
 
101
 
102
  if not query_text: return {"result": "Akustik-Fehler."}
103
 
104
+ # --- SCORING ALGORITHMUS V3 (Mit "Low Confidence" Inbox) ---
105
  best_doc = None
106
  best_score = 0
107
 
108
+ # 1. Query vorbereiten
109
  query_words_raw = query_text.lower().replace("?", "").replace(".", "").split()
110
  query_stems = [get_stem(w) for w in query_words_raw if len(w) > 2]
111
 
 
115
  for entry in KNOWLEDGE_CACHE:
116
  score = 0
117
  doc_id = entry.get('id', 'unknown')
 
 
118
  t_answer = entry.get("answer", "")
119
  t_question = entry.get("question", "")
120
  t_keywords = entry.get("keywords", [])
121
 
122
  if not t_answer or len(t_answer) < 5: continue
123
 
124
+ # A) Keyword Treffer
125
  if isinstance(t_keywords, list):
126
  for k in t_keywords:
127
  k_stem = get_stem(k)
128
  if k_stem in query_stems:
129
+ if k_stem in ['udo', 'capaneo']: score += 2
130
+ else: score += 20
 
 
 
131
 
132
+ # B) Frage/Titel Treffer
133
  if t_question:
134
  q_words = t_question.lower().replace("?", "").split()
135
  for qw in q_words:
 
138
  if qw_stem in query_stems:
139
  score += 15
140
 
 
141
  if score > best_score:
142
  best_score = score
143
  best_doc = entry
144
+ # DEBUG: Zeig uns Kandidaten im Log
145
  if score > 10:
146
  print(f" Candidate {doc_id}: {score} Pts (Titel: {t_question[:30]}...)")
147
 
148
+ # --- ERGEBNIS LOGIK ---
149
+
150
+ # Schwelle für "Ich bin mir sicher"
151
+ CONFIDENCE_THRESHOLD = 70
152
+
153
+ if best_doc and best_score >= 10:
154
  print(f"🏆 GEWINNER: Doc {best_doc['id']} mit {best_score} Punkten.")
155
+
156
+ # DEBUG: Zeig uns die ersten 100 Zeichen der Antwort im Log!
157
+ preview = best_doc['answer'][:100].replace("\n", " ")
158
+ print(f"📤 SENDE AN VAPI: '{preview}...'")
159
+
160
+ # LOGIK: Wenn der Score "naja" ist (zwischen 10 und 70), ab in die Inbox zur Prüfung!
161
+ if best_score < CONFIDENCE_THRESHOLD:
162
+ print(f"⚠️ LOW CONFIDENCE ({best_score}). Schreibe Backup in Inbox...")
163
+ if db:
164
+ try:
165
+ db.collection(COLLECTION_INBOX).add({
166
+ "question": query_text,
167
+ "status": "review_needed", # Markierung für dich
168
+ "found_answer": best_doc['answer'], # Was hat er gefunden?
169
+ "score": best_score,
170
+ "timestamp": firestore.SERVER_TIMESTAMP,
171
+ "source": "AI Call (Low Confidence)"
172
+ })
173
+ except: pass
174
+
175
  return {"result": best_doc['answer']}
176
+
177
  else:
178
  print(f"⚠️ KEIN TREFFER (Best Score: {best_score}).")
 
179
  if db:
180
  try:
181
  db.collection(COLLECTION_INBOX).add({
182
  "question": query_text,
183
  "status": "open",
184
+ "timestamp": firestore.SERVER_TIMESTAMP,
185
+ "source": "AI Call (No Hit)"
186
  })
187
  except: pass
188
  return {"result": "Dazu habe ich leider keine Informationen in meiner Datenbank."}