Pulastya0 commited on
Commit
1e4ae98
·
verified ·
1 Parent(s): 0680215

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -105
app.py CHANGED
@@ -1,6 +1,4 @@
1
  import os
2
-
3
- # SET CACHE PATHS BEFORE ANY IMPORTS
4
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
5
  os.environ["TRANSFORMERS_CACHE"] = "/tmp/transformers"
6
  os.environ["HF_HOME"] = "/tmp/huggingface"
@@ -10,32 +8,31 @@ os.environ["TORCH_HOME"] = "/tmp/torch"
10
  import json
11
  from fastapi import FastAPI, HTTPException, UploadFile, File
12
  from pydantic import BaseModel
 
13
  import chromadb
14
  from chromadb.config import Settings
15
  from sentence_transformers import SentenceTransformer
16
- import numpy as np
17
 
18
- # Import from agent_langchain
19
  from agent_langchain import (
20
- process_ticket_langchain,
21
- classify_ticket,
 
22
  call_routing,
23
  get_kb_collection,
24
- encoder
 
25
  )
26
 
27
- app = FastAPI(title="Smart Helpdesk AI Agent LangChain")
28
 
29
- # -------------------------------
30
  # Request Models
31
- # -------------------------------
32
  class TicketRequest(BaseModel):
33
  text: str
34
- user_email: str = None
 
35
 
36
- # -------------------------------
37
- # Persistent Chroma client
38
- # -------------------------------
39
  CHROMA_PATH = "/tmp/chroma"
40
  COLLECTION_NAME = "knowledge_base"
41
 
@@ -44,12 +41,8 @@ COLLECTION_NAME = "knowledge_base"
44
  # -------------------------------
45
  @app.post("/setup")
46
  async def setup_kb(kb_file: UploadFile = File(...)):
47
- """
48
- Uploads a JSON KB file (flattened), generates embeddings with SentenceTransformer,
49
- and populates a persistent ChromaDB collection.
50
- """
51
  try:
52
- # Load JSON from uploaded file
53
  content_bytes = await kb_file.read()
54
  data = json.loads(content_bytes)
55
 
@@ -58,29 +51,19 @@ async def setup_kb(kb_file: UploadFile = File(...)):
58
 
59
  print(f"📘 Loaded {len(data)} items from {kb_file.filename}")
60
 
61
- # Get or create collection using shared function
62
  chroma_client = chromadb.PersistentClient(
63
  path=CHROMA_PATH,
64
- settings=Settings(
65
- anonymized_telemetry=False,
66
- allow_reset=True
67
- )
68
  )
69
  collection = chroma_client.get_or_create_collection(COLLECTION_NAME)
70
 
71
- # Clear existing records
72
  if collection.count() > 0:
73
  print(f"🧹 Clearing {collection.count()} existing records...")
74
  collection.delete(ids=collection.get()['ids'])
75
 
76
- # Prepare texts, ids, and metadata
77
  texts, ids, metadatas = [], [], []
78
  for i, item in enumerate(data):
79
- # Try multiple possible field names for text content
80
- text = (item.get("answer") or
81
- item.get("text") or
82
- item.get("content") or "")
83
-
84
  item_id = item.get("id") or str(i)
85
  category = item.get("category", "")
86
 
@@ -88,42 +71,26 @@ async def setup_kb(kb_file: UploadFile = File(...)):
88
  print(f"⚠️ Skipping item {i} - no text content")
89
  continue
90
 
91
- # Create a richer text for embedding by combining relevant fields
92
- # This helps with better semantic search
93
- combined_text = text
94
- if category:
95
- combined_text = f"Category: {category}. {text}"
96
-
97
  texts.append(combined_text)
98
  ids.append(str(item_id))
99
- metadatas.append({
100
- "id": str(item_id),
101
- "category": category,
102
- "original_index": i
103
- })
104
 
105
  if not texts:
106
  raise HTTPException(status_code=400, detail="No valid text content found in JSON.")
107
 
108
- # Generate embeddings using the shared encoder
109
  print("🧠 Generating embeddings...")
110
  embeddings = encoder.encode(texts, show_progress_bar=True).tolist()
111
 
112
- # Add to ChromaDB
113
  print("💾 Adding to ChromaDB...")
114
- collection.add(
115
- ids=ids,
116
- embeddings=embeddings,
117
- documents=texts,
118
- metadatas=metadatas
119
- )
120
 
121
- # Update the global reference in agent_langchain
122
  import agent_langchain
123
  agent_langchain.kb_collection = collection
124
 
125
- print(f"✅ Successfully added {collection.count()} records to {COLLECTION_NAME}.")
126
- return {"message": "Knowledge base successfully initialized.", "count": collection.count()}
127
 
128
  except json.JSONDecodeError:
129
  raise HTTPException(status_code=400, detail="Invalid JSON file.")
@@ -133,39 +100,81 @@ async def setup_kb(kb_file: UploadFile = File(...)):
133
  raise HTTPException(status_code=500, detail=f"Setup failed: {str(e)}")
134
 
135
  # -------------------------------
136
- # Step-by-Step Endpoints
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  # -------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
 
 
 
139
  @app.post("/classify")
140
  async def classify_endpoint(ticket: TicketRequest):
141
- """Classify the ticket (impact, urgency, type)"""
142
  classification = classify_ticket(ticket.text)
143
  return {"classification": classification}
144
 
145
  @app.post("/route")
146
  async def route_endpoint(ticket: TicketRequest):
147
- """Route the ticket to department (Space 2)"""
148
  department = call_routing(ticket.text)
149
  return {"department": department}
150
 
151
  @app.post("/kb_query")
152
  async def kb_query_endpoint(ticket: TicketRequest):
153
- """Query the flattened KB directly using embeddings and return the best match."""
154
  collection = get_kb_collection()
155
-
156
- if not collection:
157
  raise HTTPException(status_code=400, detail="KB not set up. Call /setup first.")
158
 
159
  try:
160
- # Check if collection has data
161
- count = collection.count()
162
- if count == 0:
163
- raise HTTPException(status_code=400, detail="KB is empty. Please upload data via /setup.")
164
-
165
- # Encode query using the shared encoder
166
  query_embedding = encoder.encode([ticket.text])[0].tolist()
167
-
168
- # Query ChromaDB
169
  result = collection.query(
170
  query_embeddings=[query_embedding],
171
  n_results=1,
@@ -175,53 +184,17 @@ async def kb_query_endpoint(ticket: TicketRequest):
175
  if not result or not result.get('documents') or len(result['documents'][0]) == 0:
176
  return {"answer": "No relevant KB found.", "confidence": 0.0}
177
 
178
- # Extract best match
179
  best_doc = result['documents'][0][0]
180
  best_distance = result['distances'][0][0] if result.get('distances') else 1.0
181
-
182
- # Convert L2 distance to confidence score
183
- # For normalized embeddings, L2 distance ranges from 0 (identical) to ~2.0 (opposite)
184
  confidence = max(0.0, 1.0 - (best_distance / 2.0))
185
 
186
- return {
187
- "answer": best_doc,
188
- "confidence": round(float(confidence), 3)
189
- }
190
 
191
  except Exception as e:
192
  import traceback
193
  traceback.print_exc()
194
  raise HTTPException(status_code=500, detail=f"KB query failed: {str(e)}")
195
 
196
- # -------------------------------
197
- # Full Ticket Orchestration
198
- # -------------------------------
199
- @app.post("/orchestrate")
200
- async def orchestrate_endpoint(ticket: TicketRequest):
201
- """Full ticket orchestration via LangChain agent with nicely formatted reasoning trace"""
202
- try:
203
- result = process_ticket_langchain(ticket.text)
204
-
205
- # Format reasoning trace for readability
206
- formatted_trace = [
207
- {"step": idx + 1, "description": line}
208
- for idx, line in enumerate(result.get("reasoning_trace", []))
209
- ]
210
-
211
- response = {
212
- "status": result["status"],
213
- "classification": result["classification"],
214
- "department": result["department"],
215
- "answer": result["answer"],
216
- "reasoning_trace": formatted_trace
217
- }
218
-
219
- return response
220
- except Exception as e:
221
- import traceback
222
- traceback.print_exc()
223
- raise HTTPException(status_code=500, detail=f"Orchestration failed: {str(e)}")
224
-
225
  # -------------------------------
226
  # Health Check
227
  # -------------------------------
@@ -234,5 +207,6 @@ async def health():
234
  return {
235
  "status": "ok",
236
  "kb_status": kb_status,
237
- "kb_records": kb_count
 
238
  }
 
1
  import os
 
 
2
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
3
  os.environ["TRANSFORMERS_CACHE"] = "/tmp/transformers"
4
  os.environ["HF_HOME"] = "/tmp/huggingface"
 
8
  import json
9
  from fastapi import FastAPI, HTTPException, UploadFile, File
10
  from pydantic import BaseModel
11
+ from typing import Optional
12
  import chromadb
13
  from chromadb.config import Settings
14
  from sentence_transformers import SentenceTransformer
 
15
 
16
+ # Import from the true agent
17
  from agent_langchain import (
18
+ process_with_agent,
19
+ get_conversation_history,
20
+ classify_ticket,
21
  call_routing,
22
  get_kb_collection,
23
+ encoder,
24
+ conversations
25
  )
26
 
27
+ app = FastAPI(title="Smart Helpdesk AI Agent")
28
 
 
29
  # Request Models
 
30
  class TicketRequest(BaseModel):
31
  text: str
32
+ conversation_id: Optional[str] = None
33
+ user_email: Optional[str] = None
34
 
35
+ # Persistent Chroma settings
 
 
36
  CHROMA_PATH = "/tmp/chroma"
37
  COLLECTION_NAME = "knowledge_base"
38
 
 
41
  # -------------------------------
42
  @app.post("/setup")
43
  async def setup_kb(kb_file: UploadFile = File(...)):
44
+ """Upload and index knowledge base."""
 
 
 
45
  try:
 
46
  content_bytes = await kb_file.read()
47
  data = json.loads(content_bytes)
48
 
 
51
 
52
  print(f"📘 Loaded {len(data)} items from {kb_file.filename}")
53
 
 
54
  chroma_client = chromadb.PersistentClient(
55
  path=CHROMA_PATH,
56
+ settings=Settings(anonymized_telemetry=False, allow_reset=True)
 
 
 
57
  )
58
  collection = chroma_client.get_or_create_collection(COLLECTION_NAME)
59
 
 
60
  if collection.count() > 0:
61
  print(f"🧹 Clearing {collection.count()} existing records...")
62
  collection.delete(ids=collection.get()['ids'])
63
 
 
64
  texts, ids, metadatas = [], [], []
65
  for i, item in enumerate(data):
66
+ text = item.get("answer") or item.get("text") or item.get("content") or ""
 
 
 
 
67
  item_id = item.get("id") or str(i)
68
  category = item.get("category", "")
69
 
 
71
  print(f"⚠️ Skipping item {i} - no text content")
72
  continue
73
 
74
+ combined_text = f"Category: {category}. {text}" if category else text
 
 
 
 
 
75
  texts.append(combined_text)
76
  ids.append(str(item_id))
77
+ metadatas.append({"id": str(item_id), "category": category, "original_index": i})
 
 
 
 
78
 
79
  if not texts:
80
  raise HTTPException(status_code=400, detail="No valid text content found in JSON.")
81
 
 
82
  print("🧠 Generating embeddings...")
83
  embeddings = encoder.encode(texts, show_progress_bar=True).tolist()
84
 
 
85
  print("💾 Adding to ChromaDB...")
86
+ collection.add(ids=ids, embeddings=embeddings, documents=texts, metadatas=metadatas)
 
 
 
 
 
87
 
88
+ # Update global reference
89
  import agent_langchain
90
  agent_langchain.kb_collection = collection
91
 
92
+ print(f"✅ Successfully added {collection.count()} records")
93
+ return {"message": "Knowledge base initialized", "count": collection.count()}
94
 
95
  except json.JSONDecodeError:
96
  raise HTTPException(status_code=400, detail="Invalid JSON file.")
 
100
  raise HTTPException(status_code=500, detail=f"Setup failed: {str(e)}")
101
 
102
  # -------------------------------
103
+ # MAIN ORCHESTRATE ENDPOINT (True Agent)
104
+ # -------------------------------
105
+ @app.post("/orchestrate")
106
+ async def orchestrate_endpoint(ticket: TicketRequest):
107
+ """
108
+ Main AI Agent endpoint - handles everything:
109
+ - Initial ticket processing
110
+ - Follow-up conversations
111
+ - Automatic escalation decisions
112
+ - Multi-turn dialogue
113
+ """
114
+ try:
115
+ result = process_with_agent(
116
+ user_message=ticket.text,
117
+ conversation_id=ticket.conversation_id
118
+ )
119
+
120
+ return {
121
+ "conversation_id": result["conversation_id"],
122
+ "response": result["response"],
123
+ "status": result["status"],
124
+ "message_count": result["message_count"],
125
+ "can_continue": result.get("can_continue", True),
126
+ "instructions": {
127
+ "continue_conversation": "Send another request with the same conversation_id",
128
+ "new_ticket": "Send a request without conversation_id"
129
+ }
130
+ }
131
+
132
+ except Exception as e:
133
+ import traceback
134
+ traceback.print_exc()
135
+ raise HTTPException(status_code=500, detail=f"Agent failed: {str(e)}")
136
+
137
+ # -------------------------------
138
+ # Get Conversation History
139
  # -------------------------------
140
+ @app.get("/conversation/{conversation_id}")
141
+ async def get_conversation(conversation_id: str):
142
+ """Retrieve full conversation history."""
143
+ conv = get_conversation_history(conversation_id)
144
+ if not conv:
145
+ raise HTTPException(status_code=404, detail="Conversation not found")
146
+
147
+ return {
148
+ "conversation_id": conversation_id,
149
+ "messages": conv["messages"],
150
+ "created_at": conv["created_at"],
151
+ "message_count": len(conv["messages"])
152
+ }
153
 
154
+ # -------------------------------
155
+ # Individual Tool Endpoints (for testing)
156
+ # -------------------------------
157
  @app.post("/classify")
158
  async def classify_endpoint(ticket: TicketRequest):
159
+ """Test classification only."""
160
  classification = classify_ticket(ticket.text)
161
  return {"classification": classification}
162
 
163
  @app.post("/route")
164
  async def route_endpoint(ticket: TicketRequest):
165
+ """Test routing only."""
166
  department = call_routing(ticket.text)
167
  return {"department": department}
168
 
169
  @app.post("/kb_query")
170
  async def kb_query_endpoint(ticket: TicketRequest):
171
+ """Test KB query only."""
172
  collection = get_kb_collection()
173
+ if not collection or collection.count() == 0:
 
174
  raise HTTPException(status_code=400, detail="KB not set up. Call /setup first.")
175
 
176
  try:
 
 
 
 
 
 
177
  query_embedding = encoder.encode([ticket.text])[0].tolist()
 
 
178
  result = collection.query(
179
  query_embeddings=[query_embedding],
180
  n_results=1,
 
184
  if not result or not result.get('documents') or len(result['documents'][0]) == 0:
185
  return {"answer": "No relevant KB found.", "confidence": 0.0}
186
 
 
187
  best_doc = result['documents'][0][0]
188
  best_distance = result['distances'][0][0] if result.get('distances') else 1.0
 
 
 
189
  confidence = max(0.0, 1.0 - (best_distance / 2.0))
190
 
191
+ return {"answer": best_doc, "confidence": round(float(confidence), 3)}
 
 
 
192
 
193
  except Exception as e:
194
  import traceback
195
  traceback.print_exc()
196
  raise HTTPException(status_code=500, detail=f"KB query failed: {str(e)}")
197
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  # -------------------------------
199
  # Health Check
200
  # -------------------------------
 
207
  return {
208
  "status": "ok",
209
  "kb_status": kb_status,
210
+ "kb_records": kb_count,
211
+ "active_conversations": len(conversations)
212
  }