Spaces:
Paused
Paused
Upload main.py
Browse files
main.py
CHANGED
|
@@ -101,11 +101,11 @@ async def search_knowledge(request: Request):
|
|
| 101 |
|
| 102 |
if not query_text: return {"result": "Akustik-Fehler."}
|
| 103 |
|
| 104 |
-
# --- SCORING ALGORITHMUS
|
| 105 |
best_doc = None
|
| 106 |
best_score = 0
|
| 107 |
|
| 108 |
-
# 1. Query vorbereiten
|
| 109 |
query_words_raw = query_text.lower().replace("?", "").replace(".", "").split()
|
| 110 |
query_stems = [get_stem(w) for w in query_words_raw if len(w) > 2]
|
| 111 |
|
|
@@ -115,26 +115,21 @@ async def search_knowledge(request: Request):
|
|
| 115 |
for entry in KNOWLEDGE_CACHE:
|
| 116 |
score = 0
|
| 117 |
doc_id = entry.get('id', 'unknown')
|
| 118 |
-
|
| 119 |
-
# Daten holen
|
| 120 |
t_answer = entry.get("answer", "")
|
| 121 |
t_question = entry.get("question", "")
|
| 122 |
t_keywords = entry.get("keywords", [])
|
| 123 |
|
| 124 |
if not t_answer or len(t_answer) < 5: continue
|
| 125 |
|
| 126 |
-
# A) Keyword Treffer
|
| 127 |
if isinstance(t_keywords, list):
|
| 128 |
for k in t_keywords:
|
| 129 |
k_stem = get_stem(k)
|
| 130 |
if k_stem in query_stems:
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
score += 2
|
| 134 |
-
else:
|
| 135 |
-
score += 20 # Harter Keyword Treffer ist viel wert!
|
| 136 |
|
| 137 |
-
# B) Frage/Titel Treffer
|
| 138 |
if t_question:
|
| 139 |
q_words = t_question.lower().replace("?", "").split()
|
| 140 |
for qw in q_words:
|
|
@@ -143,27 +138,51 @@ async def search_knowledge(request: Request):
|
|
| 143 |
if qw_stem in query_stems:
|
| 144 |
score += 15
|
| 145 |
|
| 146 |
-
# Neuer Rekord?
|
| 147 |
if score > best_score:
|
| 148 |
best_score = score
|
| 149 |
best_doc = entry
|
| 150 |
-
#
|
| 151 |
if score > 10:
|
| 152 |
print(f" Candidate {doc_id}: {score} Pts (Titel: {t_question[:30]}...)")
|
| 153 |
|
| 154 |
-
# --- ERGEBNIS ---
|
| 155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
print(f"🏆 GEWINNER: Doc {best_doc['id']} mit {best_score} Punkten.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
return {"result": best_doc['answer']}
|
|
|
|
| 158 |
else:
|
| 159 |
print(f"⚠️ KEIN TREFFER (Best Score: {best_score}).")
|
| 160 |
-
# Inbox Eintrag
|
| 161 |
if db:
|
| 162 |
try:
|
| 163 |
db.collection(COLLECTION_INBOX).add({
|
| 164 |
"question": query_text,
|
| 165 |
"status": "open",
|
| 166 |
-
"timestamp": firestore.SERVER_TIMESTAMP
|
|
|
|
| 167 |
})
|
| 168 |
except: pass
|
| 169 |
return {"result": "Dazu habe ich leider keine Informationen in meiner Datenbank."}
|
|
|
|
| 101 |
|
| 102 |
if not query_text: return {"result": "Akustik-Fehler."}
|
| 103 |
|
| 104 |
+
# --- SCORING ALGORITHMUS V3 (Mit "Low Confidence" Inbox) ---
|
| 105 |
best_doc = None
|
| 106 |
best_score = 0
|
| 107 |
|
| 108 |
+
# 1. Query vorbereiten
|
| 109 |
query_words_raw = query_text.lower().replace("?", "").replace(".", "").split()
|
| 110 |
query_stems = [get_stem(w) for w in query_words_raw if len(w) > 2]
|
| 111 |
|
|
|
|
| 115 |
for entry in KNOWLEDGE_CACHE:
|
| 116 |
score = 0
|
| 117 |
doc_id = entry.get('id', 'unknown')
|
|
|
|
|
|
|
| 118 |
t_answer = entry.get("answer", "")
|
| 119 |
t_question = entry.get("question", "")
|
| 120 |
t_keywords = entry.get("keywords", [])
|
| 121 |
|
| 122 |
if not t_answer or len(t_answer) < 5: continue
|
| 123 |
|
| 124 |
+
# A) Keyword Treffer
|
| 125 |
if isinstance(t_keywords, list):
|
| 126 |
for k in t_keywords:
|
| 127 |
k_stem = get_stem(k)
|
| 128 |
if k_stem in query_stems:
|
| 129 |
+
if k_stem in ['udo', 'capaneo']: score += 2
|
| 130 |
+
else: score += 20
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
+
# B) Frage/Titel Treffer
|
| 133 |
if t_question:
|
| 134 |
q_words = t_question.lower().replace("?", "").split()
|
| 135 |
for qw in q_words:
|
|
|
|
| 138 |
if qw_stem in query_stems:
|
| 139 |
score += 15
|
| 140 |
|
|
|
|
| 141 |
if score > best_score:
|
| 142 |
best_score = score
|
| 143 |
best_doc = entry
|
| 144 |
+
# DEBUG: Zeig uns Kandidaten im Log
|
| 145 |
if score > 10:
|
| 146 |
print(f" Candidate {doc_id}: {score} Pts (Titel: {t_question[:30]}...)")
|
| 147 |
|
| 148 |
+
# --- ERGEBNIS LOGIK ---
|
| 149 |
+
|
| 150 |
+
# Schwelle für "Ich bin mir sicher"
|
| 151 |
+
CONFIDENCE_THRESHOLD = 70
|
| 152 |
+
|
| 153 |
+
if best_doc and best_score >= 10:
|
| 154 |
print(f"🏆 GEWINNER: Doc {best_doc['id']} mit {best_score} Punkten.")
|
| 155 |
+
|
| 156 |
+
# DEBUG: Zeig uns die ersten 100 Zeichen der Antwort im Log!
|
| 157 |
+
preview = best_doc['answer'][:100].replace("\n", " ")
|
| 158 |
+
print(f"📤 SENDE AN VAPI: '{preview}...'")
|
| 159 |
+
|
| 160 |
+
# LOGIK: Wenn der Score "naja" ist (zwischen 10 und 70), ab in die Inbox zur Prüfung!
|
| 161 |
+
if best_score < CONFIDENCE_THRESHOLD:
|
| 162 |
+
print(f"⚠️ LOW CONFIDENCE ({best_score}). Schreibe Backup in Inbox...")
|
| 163 |
+
if db:
|
| 164 |
+
try:
|
| 165 |
+
db.collection(COLLECTION_INBOX).add({
|
| 166 |
+
"question": query_text,
|
| 167 |
+
"status": "review_needed", # Markierung für dich
|
| 168 |
+
"found_answer": best_doc['answer'], # Was hat er gefunden?
|
| 169 |
+
"score": best_score,
|
| 170 |
+
"timestamp": firestore.SERVER_TIMESTAMP,
|
| 171 |
+
"source": "AI Call (Low Confidence)"
|
| 172 |
+
})
|
| 173 |
+
except: pass
|
| 174 |
+
|
| 175 |
return {"result": best_doc['answer']}
|
| 176 |
+
|
| 177 |
else:
|
| 178 |
print(f"⚠️ KEIN TREFFER (Best Score: {best_score}).")
|
|
|
|
| 179 |
if db:
|
| 180 |
try:
|
| 181 |
db.collection(COLLECTION_INBOX).add({
|
| 182 |
"question": query_text,
|
| 183 |
"status": "open",
|
| 184 |
+
"timestamp": firestore.SERVER_TIMESTAMP,
|
| 185 |
+
"source": "AI Call (No Hit)"
|
| 186 |
})
|
| 187 |
except: pass
|
| 188 |
return {"result": "Dazu habe ich leider keine Informationen in meiner Datenbank."}
|