File size: 7,475 Bytes
7606f4c
1b5d2ca
eeb5b5c
45abf5d
eeb5b5c
 
45dc038
6e26e1d
eeb5b5c
 
3d18df1
45dc038
0cfb1eb
45abf5d
db10d88
45dc038
0459685
45dc038
c106ebb
bb1daa0
 
 
eeb5b5c
 
 
bb1daa0
eeb5b5c
bb1daa0
eeb5b5c
bb1daa0
6e26e1d
a3b124a
db10d88
 
3d18df1
db10d88
 
a3b124a
bb1daa0
db10d88
a3b124a
db10d88
 
a3b124a
db10d88
 
45abf5d
3e87852
a0a7178
3e87852
45abf5d
 
 
 
3e87852
 
45abf5d
a0a7178
45abf5d
 
 
93ddfa4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0cfb1eb
a0a7178
0cfb1eb
1b5d2ca
a3b124a
93ddfa4
 
 
 
45abf5d
bbd2971
93ddfa4
 
a0a7178
 
45abf5d
a0a7178
 
 
 
 
 
 
 
 
 
45abf5d
 
a0a7178
45abf5d
a0a7178
3d18df1
bbd2971
 
45abf5d
93ddfa4
 
45abf5d
93ddfa4
 
45abf5d
 
a0a7178
 
 
 
3d18df1
45abf5d
 
 
a0a7178
45abf5d
 
 
 
a0a7178
45abf5d
 
 
a0a7178
93ddfa4
 
 
 
a0a7178
 
93ddfa4
a0a7178
 
 
45abf5d
93ddfa4
bbd2971
93ddfa4
a0a7178
bbd2971
a0a7178
bbd2971
45abf5d
bbd2971
 
 
 
 
93ddfa4
a0a7178
212e616
a0a7178
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3d18df1
a0a7178
 
3d18df1
a0a7178
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
from fastapi import FastAPI, Request
import json
import os
import re
import firebase_admin
from firebase_admin import credentials, firestore
from datetime import datetime

app = FastAPI()

# --- SETUP ---
COLLECTION_KNOWLEDGE = "knowledge_base"
COLLECTION_RULES = "availability_rules"
COLLECTION_INBOX = "inbox"
KNOWLEDGE_CACHE = []

# --- FIREBASE VERBINDUNG ---
db = None
try:
    key = os.environ.get("FIREBASE_KEY")
    if key:
        cred = credentials.Certificate(json.loads(key))
        if not firebase_admin._apps:
            firebase_admin.initialize_app(cred)
        db = firestore.client()
        print("✅ DB VERBUNDEN")
    else:
        print("❌ FEHLER: FIREBASE_KEY fehlt!")
except Exception as e:
    print(f"❌ DB CRASH: {e}")

# --- CACHE LADEN ---
def reload_knowledge():
    global KNOWLEDGE_CACHE
    if not db: return
    try:
        docs = db.collection(COLLECTION_KNOWLEDGE).stream()
        KNOWLEDGE_CACHE = [d.to_dict() for d in docs]
        print(f"📚 {len(KNOWLEDGE_CACHE)} Einträge geladen.")
    except Exception as e:
        print(f"❌ Cache Fehler: {e}")

@app.on_event("startup")
async def startup():
    reload_knowledge()

# --- HELPER: STEMMING & TOKENIZING ---
def get_stem(word):
    # Einfaches Stemming
    w = word.lower().strip()
    suffixes = ["ungen", "innen", "keit", "sch", "ern", "en", "er", "es", "st", "te", "e", "s", "t"]
    for end in suffixes:
        if w.endswith(end) and len(w) > len(end) + 2: 
            return w[:-len(end)]
    return w

def tokenize(text):
    # Entfernt Sonderzeichen und zerlegt in Stämme
    clean_text = re.sub(r'[^\w\s]', '', text.lower())
    return [get_stem(w) for w in clean_text.split() if w]

# --- HELPER: VAPI REQUEST PARSER ---
def parse_vapi_request(data):
    tool_call_id = "unknown"
    args = {}
    try:
        msg = data.get("message", {})
        if "toolCallList" in msg:
            call = msg["toolCallList"][0]
            tool_call_id = call["id"]
            if "function" in call and "arguments" in call["function"]:
                args = call["function"]["arguments"]
        elif "toolCalls" in msg:
            call = msg["toolCalls"][0]
            tool_call_id = call["id"]
            if "function" in call and "arguments" in call["function"]:
                args = call["function"]["arguments"]
        if isinstance(args, str):
            args = json.loads(args)
    except Exception as e:
        print(f"⚠️ Parsing Info: {e}")
    return tool_call_id, args

# ==========================================
# TOOL: SUCHE (OPTIMIERT)
# ==========================================
@app.post("/search")
async def search(request: Request):
    data = await request.json()
    tool_call_id, args = parse_vapi_request(data)
    query = args.get("search_query") or args.get("query") or data.get("search_query")
    
    print(f"🔎 QUERY: '{query}'")
    answer_text = "Dazu habe ich leider keine Informationen in meiner Datenbank."
    
    if query:
        # --- STOP WÖRTER LISTE (MASSIV ERWEITERT) ---
        # Diese Wörter werden komplett ignoriert und geben 0 Punkte.
        STOP_WORDS = [
            # Kommunikation
            "hallo", "guten", "tag", "moin", "bitte", "danke", "frage", "sagen", "kannst", "du", "mir", 
            "was", "ist", "wer", "wie", "wo", "wann", "erzähl", "über", "möchte", "will", "haben",
            # Artikel & Füllwörter (DIE KILLER!)
            "der", "die", "das", "dem", "den", "des", "ein", "eine", "einer", "eines", 
            "im", "in", "von", "zu", "bei", "mit", "für", "auf", "aus", "um", "und", "oder",
            # Generische Business-Wörter (die alles matchen würden)
            "anbieten", "machen", "tun", "geben", "helfen", "unterstützen", "bieten",
            "firma", "unternehmen", "gmbh", "produkt", "system", "plattform" 
            # "plattform" ist hier Stop-Wort, damit "Kosten der Plattform" nicht beim "Plattform-Feature" landet!
        ]
        
        # 1. Query bereinigen
        query_stems = [w for w in tokenize(query) if w not in STOP_WORDS and len(w) > 2]
        print(f"🧐 Relevante Tokens: {query_stems}")
        
        found = False
        
        if query_stems:
            best_doc = None
            best_score = 0
            
            for doc in KNOWLEDGE_CACHE:
                score = 0
                hits = []
                
                # Dokument Inhalte tokenizen
                # WICHTIG: Keywords zählen wir doppelt so stark, wenn sie exakt passen
                doc_keywords = [get_stem(k) for k in doc.get("keywords", [])] 
                doc_title_stems = tokenize(doc.get("question", ""))
                
                for q_stem in query_stems:
                    
                    # 1. KEYWORD MATCH (Der "Router") -> 100 Punkte!
                    if q_stem in doc_keywords:
                        score += 100
                        hits.append(f"KEYWORD '{q_stem}'")
                        
                    # 2. TITEL MATCH -> 50 Punkte
                    elif q_stem in doc_title_stems:
                        score += 50
                        hits.append(f"TITLE '{q_stem}'")
                        
                    # (Wir ignorieren den Fließtext für das Scoring, um Zufallstreffer zu vermeiden)

                if score > best_score:
                    best_score = score
                    best_doc = doc
                    if score > 0:
                        print(f"   Kandidat: {score} Pkt ({hits}) -> {doc.get('question')[:30]}...")

            # SCHWELLE: 50 PUNKTE
            # Es muss mindestens ein Titel-Treffer (50) oder Keyword (100) sein.
            if best_doc and best_score >= 50:
                print(f"🏆 GEWINNER ({best_score} Pkt): {best_doc.get('question')}")
                answer_text = best_doc.get("answer")
                found = True
            else:
                 print(f"⚠️ Kein Treffer (Max Score: {best_score})")
        
        # --- INBOX ---
        if not found and db:
            print("📥 Ab in die Inbox.")
            db.collection(COLLECTION_INBOX).add({
                "query": query,
                "timestamp": datetime.now(),
                "status": "open"
            })

    return {"results": [{"toolCallId": tool_call_id, "result": answer_text}]}

# --- ANDERE ENDPOINTS ---
@app.post("/check_availability")
async def check_availability(request: Request):
    data = await request.json()
    tool_call_id, _ = parse_vapi_request(data)
    today = datetime.now().strftime("%Y-%m-%d")
    status, instruction = "available", "Normal arbeiten"
    if db:
        rules = db.collection(COLLECTION_RULES).where("active", "==", True).stream()
        for r in rules:
            rd = r.to_dict()
            if rd.get('start_date') <= today <= rd.get('end_date'):
                status = "limited" if "ferien" in rd.get('name', '').lower() else "unavailable"
                instruction = rd.get('instruction_text')
                break
    return {"results": [{"toolCallId": tool_call_id, "result": {"status": status, "instruction": instruction}}]}

@app.post("/vapi-incoming")
async def dummy_incoming(request: Request): return {"status": "ok"}
@app.get("/")
def home(): return {"status": "Online"}