File size: 5,223 Bytes
152677c
 
 
 
 
 
 
 
0d4c85e
152677c
 
 
 
 
 
 
 
 
 
 
3dc7924
152677c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
from fastapi import FastAPI
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.config import Settings
import uuid
from huggingface_hub import InferenceClient
import os
from docx import Document
import google.generativeai as genai

# --- 0. Config ---
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
if not GEMINI_API_KEY:
    raise RuntimeError("GEMINI_API_KEY is not set in environment.")

# Configure the SDK
genai.configure(api_key=GEMINI_API_KEY)

# Choose the model
MODEL_NAME = "gemini-2.5-flash-lite"
LLM = genai.GenerativeModel(MODEL_NAME)
 
app = FastAPI()
 
# -----------------------------
# 1. SETUP: Embeddings + LLM
# -----------------------------
 
EMBED_MODEL = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
 
# -----------------------------
# 2. SETUP: ChromaDB
# -----------------------------
 
chroma_client = chromadb.PersistentClient(path="./chroma_db")
collection = chroma_client.get_or_create_collection(name="knowledge_base")
 
# -----------------------------
# Helper: Extract text from docx
# -----------------------------
 
def extract_docx_text(file_path):
    doc = Document(file_path)
    return "\n".join([para.text for para in doc.paragraphs])
 
# -----------------------------
# 3. STARTUP INGEST
# -----------------------------
 
@app.on_event("startup")
def ingest_documents():
    print("Checking if KB already has data...")
 
    if collection.count() > 0:
        print("KB exists. Skipping ingest.")
        return
 
    print("Empty KB. Ingesting files...")
 
    for fname in os.listdir("./documents"):
        if fname.endswith(".docx"):
            text = extract_docx_text(f"./documents/{fname}")
 
            chunks = text.split("\n\n")  # simple chunking for beginners
 
            for chunk in chunks:
                if len(chunk.strip()) < 50:
                    continue
 
                embedding = EMBED_MODEL.encode(chunk).tolist()
                collection.add(
                    ids=[str(uuid.uuid4())],
                    embeddings=[embedding],
                    documents=[chunk],
                    metadatas=[{"source": fname}]
                )
 
    print("Ingest complete.")
 
# -----------------------------
# 4. LLM for Intent detection
# -----------------------------
 
def get_intent(query):
    prompt = f"""
Classify the user's intent from the list:
 
- receiving
- inventory_adjustment
- update_footprint
- picking
- shipping
- trailer_close
 
User query: "{query}"
 
Respond ONLY with the intent label.
"""
 
    resp = LLM.text_generation(prompt, max_new_tokens=10)
    return resp.strip()
 
# -----------------------------
# 5. Hybrid Search (vector + keyword)
# -----------------------------
 
def hybrid_search(query, intent, top_k=3):
    # Vector search
    emb = EMBED_MODEL.encode(query).tolist()
    results = collection.query(query_embeddings=[emb], n_results=top_k)
 
    docs = results["documents"][0]
    scores = results["distances"][0]
 
    # Convert distances to similarity
    similarities = [1 - d for d in scores]
 
    combined = list(zip(docs, similarities))
 
    # Simple keyword boost
    boosted = []
    for text, sim in combined:
        score = sim
        if intent.replace("_", " ") in text.lower():
            score += 0.05
        boosted.append((text, score))
 
    boosted.sort(key=lambda x: x[1], reverse=True)
    return boosted
 
# -----------------------------
# 6. LLM Format (rephrase KB)
# -----------------------------
 
def format_with_llm(answer):
    prompt = f"""
Rewrite this answer clearly and politely without adding new information:
 
{answer}
"""
    return LLM.text_generation(prompt, max_new_tokens=150)
 
# -----------------------------
# 7. RAG Fallback
# -----------------------------
 
def rag_fallback(query, docs):
    context = "\n\n".join([d for d, _ in docs])
    prompt = f"""
Use ONLY the information below to answer the question.
If the answer is not found, say "not found".
 
Context:
{context}
 
Question: {query}
Answer:
"""
    return LLM.text_generation(prompt, max_new_tokens=200)
 
# -----------------------------
# 8. INCIDENT NUMBER GENERATOR
# -----------------------------
 
def generate_incident():
    return "INC" + str(uuid.uuid4())[:8].upper()
 
# -----------------------------
# 9. MAIN CHAT ENDPOINT
# -----------------------------
 
@app.post("/chat")
def chat(query: str):
    # Step 2: Detect intent
    intent = get_intent(query)
 
    # Step 3–4: Hybrid search
    docs = hybrid_search(query, intent)
 
    top_answer, top_score = docs[0]
 
    # Step 5: High confidence (≥ 0.89)
    if top_score >= 0.89:
        reply = format_with_llm(top_answer)
        return {"answer": reply, "intent": intent, "confidence": top_score}
 
    # Step 6: RAG fallback
    rag_answer = rag_fallback(query, docs)
 
    if "not found" not in rag_answer.lower() and len(rag_answer.split()) > 5:
        return {"answer": rag_answer, "intent": intent, "mode": "RAG"}
 
    # Step 7: Still not resolved → create incident
    incident = generate_incident()
    return {
        "answer": f"I couldn't find this information. I've created incident {incident}.",
        "incident": incident,
        "intent": intent
    }