Jaita commited on
Commit
152677c
·
verified ·
1 Parent(s): 7c7feac

Create main.py

Browse files
Files changed (1) hide show
  1. main.py +198 -0
main.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from sentence_transformers import SentenceTransformer
3
+ import chromadb
4
+ from chromadb.config import Settings
5
+ import uuid
6
+ from huggingface_hub import InferenceClient
7
+ import os
8
+ from docx import Document
9
+
10
+ # --- 0. Config ---
11
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
12
+ if not GEMINI_API_KEY:
13
+ raise RuntimeError("GEMINI_API_KEY is not set in environment.")
14
+
15
+ # Configure the SDK
16
+ genai.configure(api_key=GEMINI_API_KEY)
17
+
18
+ # Choose the model
19
+ MODEL_NAME = "gemini-2.5-flash-lite"
20
+ model = genai.GenerativeModel(MODEL_NAME)
21
+
22
+ app = FastAPI()
23
+
24
+ # -----------------------------
25
+ # 1. SETUP: Embeddings + LLM
26
+ # -----------------------------
27
+
28
+ EMBED_MODEL = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
29
+
30
+ # -----------------------------
31
+ # 2. SETUP: ChromaDB
32
+ # -----------------------------
33
+
34
+ chroma_client = chromadb.PersistentClient(path="./chroma_db")
35
+ collection = chroma_client.get_or_create_collection(name="knowledge_base")
36
+
37
+ # -----------------------------
38
+ # Helper: Extract text from docx
39
+ # -----------------------------
40
+
41
+ def extract_docx_text(file_path):
42
+ doc = Document(file_path)
43
+ return "\n".join([para.text for para in doc.paragraphs])
44
+
45
+ # -----------------------------
46
+ # 3. STARTUP INGEST
47
+ # -----------------------------
48
+
49
+ @app.on_event("startup")
50
+ def ingest_documents():
51
+ print("Checking if KB already has data...")
52
+
53
+ if collection.count() > 0:
54
+ print("KB exists. Skipping ingest.")
55
+ return
56
+
57
+ print("Empty KB. Ingesting files...")
58
+
59
+ for fname in os.listdir("./documents"):
60
+ if fname.endswith(".docx"):
61
+ text = extract_docx_text(f"./documents/{fname}")
62
+
63
+ chunks = text.split("\n\n") # simple chunking for beginners
64
+
65
+ for chunk in chunks:
66
+ if len(chunk.strip()) < 50:
67
+ continue
68
+
69
+ embedding = EMBED_MODEL.encode(chunk).tolist()
70
+ collection.add(
71
+ ids=[str(uuid.uuid4())],
72
+ embeddings=[embedding],
73
+ documents=[chunk],
74
+ metadatas=[{"source": fname}]
75
+ )
76
+
77
+ print("Ingest complete.")
78
+
79
+ # -----------------------------
80
+ # 4. LLM for Intent detection
81
+ # -----------------------------
82
+
83
+ def get_intent(query):
84
+ prompt = f"""
85
+ Classify the user's intent from the list:
86
+
87
+ - receiving
88
+ - inventory_adjustment
89
+ - update_footprint
90
+ - picking
91
+ - shipping
92
+ - trailer_close
93
+
94
+ User query: "{query}"
95
+
96
+ Respond ONLY with the intent label.
97
+ """
98
+
99
+ resp = LLM.text_generation(prompt, max_new_tokens=10)
100
+ return resp.strip()
101
+
102
+ # -----------------------------
103
+ # 5. Hybrid Search (vector + keyword)
104
+ # -----------------------------
105
+
106
+ def hybrid_search(query, intent, top_k=3):
107
+ # Vector search
108
+ emb = EMBED_MODEL.encode(query).tolist()
109
+ results = collection.query(query_embeddings=[emb], n_results=top_k)
110
+
111
+ docs = results["documents"][0]
112
+ scores = results["distances"][0]
113
+
114
+ # Convert distances to similarity
115
+ similarities = [1 - d for d in scores]
116
+
117
+ combined = list(zip(docs, similarities))
118
+
119
+ # Simple keyword boost
120
+ boosted = []
121
+ for text, sim in combined:
122
+ score = sim
123
+ if intent.replace("_", " ") in text.lower():
124
+ score += 0.05
125
+ boosted.append((text, score))
126
+
127
+ boosted.sort(key=lambda x: x[1], reverse=True)
128
+ return boosted
129
+
130
+ # -----------------------------
131
+ # 6. LLM Format (rephrase KB)
132
+ # -----------------------------
133
+
134
+ def format_with_llm(answer):
135
+ prompt = f"""
136
+ Rewrite this answer clearly and politely without adding new information:
137
+
138
+ {answer}
139
+ """
140
+ return LLM.text_generation(prompt, max_new_tokens=150)
141
+
142
+ # -----------------------------
143
+ # 7. RAG Fallback
144
+ # -----------------------------
145
+
146
+ def rag_fallback(query, docs):
147
+ context = "\n\n".join([d for d, _ in docs])
148
+ prompt = f"""
149
+ Use ONLY the information below to answer the question.
150
+ If the answer is not found, say "not found".
151
+
152
+ Context:
153
+ {context}
154
+
155
+ Question: {query}
156
+ Answer:
157
+ """
158
+ return LLM.text_generation(prompt, max_new_tokens=200)
159
+
160
+ # -----------------------------
161
+ # 8. INCIDENT NUMBER GENERATOR
162
+ # -----------------------------
163
+
164
+ def generate_incident():
165
+ return "INC" + str(uuid.uuid4())[:8].upper()
166
+
167
+ # -----------------------------
168
+ # 9. MAIN CHAT ENDPOINT
169
+ # -----------------------------
170
+
171
+ @app.post("/chat")
172
+ def chat(query: str):
173
+ # Step 2: Detect intent
174
+ intent = get_intent(query)
175
+
176
+ # Step 3–4: Hybrid search
177
+ docs = hybrid_search(query, intent)
178
+
179
+ top_answer, top_score = docs[0]
180
+
181
+ # Step 5: High confidence (≥ 0.89)
182
+ if top_score >= 0.89:
183
+ reply = format_with_llm(top_answer)
184
+ return {"answer": reply, "intent": intent, "confidence": top_score}
185
+
186
+ # Step 6: RAG fallback
187
+ rag_answer = rag_fallback(query, docs)
188
+
189
+ if "not found" not in rag_answer.lower() and len(rag_answer.split()) > 5:
190
+ return {"answer": rag_answer, "intent": intent, "mode": "RAG"}
191
+
192
+ # Step 7: Still not resolved → create incident
193
+ incident = generate_incident()
194
+ return {
195
+ "answer": f"I couldn't find this information. I've created incident {incident}.",
196
+ "incident": incident,
197
+ "intent": intent
198
+ }