Sathvik-kota commited on
Commit
14e52d0
·
verified ·
1 Parent(s): 3f9426c

Upload folder using huggingface_hub

Browse files
Files changed (8) hide show
  1. Dockerfile +36 -0
  2. README.md +1 -10
  3. app.py +179 -0
  4. async_microservice.py +173 -0
  5. requirements.txt +11 -0
  6. start.sh +25 -0
  7. sync_async_routing_API.py +58 -0
  8. sync_path_microservice.py +136 -0
Dockerfile ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use a standard Python 3.11 slim image
2
+ FROM python:3.11-slim
3
+
4
+ # Set up a new user named "user" with user ID 1000 (required by HF Spaces)
5
+ RUN useradd -m -u 1000 user
6
+
7
+ # Switch to the "user" user
8
+ USER user
9
+
10
+ # Set home to the user's home directory
11
+ ENV HOME=/home/user \
12
+ PATH=/home/user/.local/bin:$PATH
13
+
14
+ # Set the working directory to the user's home directory
15
+ WORKDIR $HOME/app
16
+
17
+ # Copy the requirements file first to leverage Docker layer caching
18
+ # Use --chown=user to ensure proper permissions
19
+ COPY --chown=user requirements.txt .
20
+
21
+ # Install all your Python dependencies
22
+ # Make sure torch and sentence-transformers are in your requirements.txt!
23
+ RUN pip install --no-cache-dir --upgrade pip && \
24
+ pip install --no-cache-dir -r requirements.txt
25
+
26
+ # Copy all your project files into the container with proper ownership
27
+ COPY --chown=user . .
28
+
29
+ # Make our startup script executable
30
+ RUN chmod +x ./start.sh
31
+
32
+ # Tell Hugging Face that your app will be on port 8501
33
+ EXPOSE 8501
34
+
35
+ # Run the startup script when the container starts
36
+ CMD ["./start.sh"]
README.md CHANGED
@@ -1,10 +1 @@
1
- ---
2
- title: Proto
3
- emoji: ⚡
4
- colorFrom: blue
5
- colorTo: indigo
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ # Banking_support_ticket-system_prototype
 
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ import time
4
+ import json
5
+
6
+ # --- THIS IS THE NEW LINE ---
7
+ st.set_page_config(layout="wide")
8
+ # ----------------------------
9
+
10
+ # ---------- Streamlit Layout ----------
11
+ st.title("Banking Support Ticket System")
12
+ st.subheader("Submit a Ticket")
13
+
14
+ # --- NEW: Create two columns ---
15
+ col1, col2 = st.columns(2)
16
+
17
+ # --- Column 1: The Form ---
18
+ with col1:
19
+ with st.form(key="ticket_form"):
20
+ channel = st.selectbox("Channel", ["Email", "Chat", "Phone"])
21
+
22
+ # Updated selectbox for clarity
23
+ severity_option = st.selectbox(
24
+ "Severity (Determines Processing Path)",
25
+ [
26
+ "High (Sync - service)",
27
+ "Medium (Async - service)",
28
+ "Low (Async - service)"
29
+ ]
30
+ )
31
+
32
+ summary = st.text_area("Summary", "Example: My credit card payment is not going through.")
33
+ submit_button = st.form_submit_button(label="Submit Ticket")
34
+
35
+ # Map user-friendly option to the API value
36
+ severity_mapping = {
37
+ "High (Sync - service)": "High",
38
+ "Medium (Async - service)": "Medium",
39
+ "Low (Async - service)": "Low"
40
+ }
41
+ severity = severity_mapping[severity_option]
42
+
43
+ # ---------- API Call ----------
44
+ API_URL = "http://localhost:8000/ticket" # Orchestrator
45
+ RESULT_URL = "http://localhost:8000/result" # Orchestrator's result endpoint
46
+
47
+ # --- Column 2: The Results ---
48
+ with col2:
49
+ if submit_button:
50
+ ticket_data = {
51
+ "channel": channel,
52
+ "severity": severity,
53
+ "summary": summary
54
+ }
55
+
56
+ start_time = time.time() # Start round-trip timer
57
+
58
+ # --- NEW: Define the API call logic as a reusable function ---
59
+ def make_api_call():
60
+ try:
61
+ response = requests.post(API_URL, json=ticket_data)
62
+
63
+ if response.status_code == 200:
64
+ try:
65
+ res = response.json()
66
+ except json.JSONDecodeError:
67
+ st.error(f"Error: Could not decode JSON response from orchestrator. Response: {response.text}")
68
+ res = None
69
+
70
+ if res:
71
+ # Check if ticket is async
72
+ if res.get("status") == "queued":
73
+ # --- UPDATED ASYNC MESSAGE ---
74
+ st.success(f"Your ticket has been submitted to our support team (Job ID: {res['ticket_id']})")
75
+ st.info("Our team is reviewing the issue and will get back to you as soon as the problem is traced. The results will appear here automatically.")
76
+ result_placeholder = st.empty()
77
+
78
+ # Poll API until result is ready
79
+ while True:
80
+ result_resp = requests.get(f"{RESULT_URL}/{res['ticket_id']}")
81
+
82
+ if result_resp.status_code == 200:
83
+ try:
84
+ result_data = result_resp.json()
85
+ except json.JSONDecodeError:
86
+ result_placeholder.error(f"Error: Could not decode JSON response from result endpoint. Response: {result_resp.text}")
87
+ break
88
+
89
+ if result_data.get("status") == "completed":
90
+ result_placeholder.empty() # Clear the "processing" message
91
+ st.success("Support Team Response:")
92
+
93
+ result = result_data.get('result', {})
94
+ st.write(f"**Decision:** {result.get('decision', 'N/A')}")
95
+ st.write(f"**Reason:** {result.get('reason', 'N/A')}")
96
+ st.write("**Next Actions:**")
97
+ for step in result.get('next_actions', []):
98
+ st.write(f"- {step}")
99
+
100
+ # Display Processing Time
101
+ processing_time = result.get("processing_time")
102
+ if processing_time:
103
+ st.write(f"**AI Processing Time:** {processing_time:.2f} seconds")
104
+
105
+ retrieved_context = result.get("retrieved_context")
106
+ if retrieved_context:
107
+ with st.expander("Show RAG Context"):
108
+ st.text(retrieved_context)
109
+
110
+ break # Exit polling loop
111
+
112
+ # --- UPDATED ASYNC POLLING MESSAGES ---
113
+ elif result_data.get("status") == "processing":
114
+ result_placeholder.info("Our support team is actively reviewing your ticket now...")
115
+
116
+ elif result_data.get("status") == "queued":
117
+ result_placeholder.info("Your ticket is in the queue. Our team will review it shortly.")
118
+
119
+ elif result_data.get("status") == "error":
120
+ result_placeholder.error(f"Error processing ticket: {result_data.get('detail')}")
121
+ break
122
+
123
+ else:
124
+ result_placeholder.info(f"Waiting for result... (Status: {result_data.get('status')})")
125
+
126
+ elif result_resp.status_code == 404:
127
+ st.error("Error: Result endpoint not found (404). Check Orchestrator (`sync_async_routing_API.py`).")
128
+ break
129
+ else:
130
+ st.error(f"Error polling for result: {result_resp.status_code} - {result_resp.text}")
131
+ break
132
+
133
+ time.sleep(2)
134
+
135
+ # Sync ticket
136
+ elif res.get("decision"):
137
+ total_time = time.time() - start_time
138
+ # --- UPDATED SYNC MESSAGE ---
139
+ st.success("Received Real-Time Support Response:")
140
+
141
+ st.write(f"**Decision:** {res['decision']}")
142
+ st.write(f"**Reason:** {res['reason']}")
143
+ st.write("**Next Actions:**")
144
+ for step in res['next_actions']:
145
+ st.write(f"- {step}")
146
+
147
+ # Display Processing Time
148
+ processing_time = res.get("processing_time")
149
+ if processing_time:
150
+ st.write(f"**AI Processing Time:** {processing_time:.2f} seconds")
151
+
152
+ st.write(f"**Total round-trip time:** {total_time:.2f} seconds")
153
+
154
+ retrieved_context = res.get("retrieved_context")
155
+ if retrieved_context:
156
+ with st.expander("Show RAG Context"):
157
+ st.text(retrieved_context)
158
+
159
+ else:
160
+ st.error(f"Error: Unknown response from orchestrator: {res}")
161
+
162
+ else:
163
+ st.error(f"Error submitting ticket: {response.status_code} - {response.text}")
164
+
165
+ except requests.exceptions.ConnectionError:
166
+ st.error("API connection failed. Is the Orchestrator (port 8000) running?")
167
+ except Exception as e:
168
+ st.error(f"An unexpected error occurred: {e}")
169
+
170
+ # --- NEW: Conditional spinner logic ---
171
+ if severity == "High":
172
+ # SYNC: Show spinner while waiting
173
+ with st.spinner("Contacting support... Please wait."):
174
+ make_api_call()
175
+ else:
176
+ # ASYNC: No spinner, just make the call.
177
+ # The "queued" status from make_api_call() will appear instantly.
178
+ make_api_call()
179
+
async_microservice.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import google.generativeai as genai
2
+ from fastapi import FastAPI, HTTPException
3
+ from pydantic import BaseModel
4
+ import os
5
+ import json
6
+ import asyncio
7
+ import time
8
+ from sentence_transformers import SentenceTransformer, util
9
+ import torch
10
+ from uuid import uuid4
11
+ # Configure the Gemini client
12
+ try:
13
+ genai.configure(api_key=os.environ.get("GOOGLE_API_KEY"))
14
+ print("Google Gemini client initialized successfully.")
15
+ except Exception as e:
16
+ print(f"Error initializing Google Gemini client: {e}")
17
+ genai = None
18
+
19
+ # ADD RAG MODEL + MEMORY
20
+ try:
21
+ embed_model = SentenceTransformer("all-MiniLM-L6-v2")
22
+ memory_store = [] # List of {"text": "...", "embedding": tensor, "response": "..."}
23
+ print("SentenceTransformer model loaded successfully.")
24
+ except Exception as e:
25
+ print(f"Error loading SentenceTransformer model: {e}")
26
+ embed_model = None
27
+
28
+ app = FastAPI(title="Async Ticket Service (Gemini RAG)")
29
+
30
+ ticket_queue = asyncio.Queue()
31
+ results_store = {}
32
+
33
+ class Ticket(BaseModel):
34
+ channel: str
35
+ severity: str
36
+ summary: str
37
+
38
+ # RAG HELPER FUNCTIONS
39
+ def add_to_memory(ticket_text, response_text):
40
+ if not embed_model:
41
+ print("Embed model not loaded, skipping memory add.")
42
+ return
43
+ try:
44
+ embedding = embed_model.encode(ticket_text, convert_to_tensor=True)
45
+ memory_store.append({"text": ticket_text, "embedding": embedding, "response": response_text})
46
+ print(f"Added to memory. New memory size: {len(memory_store)}")
47
+ except Exception as e:
48
+ print(f"Error adding to memory: {e}")
49
+
50
+ # --- MODIFIED: retrieve_context function (with fix) ---
51
+ def retrieve_context(query_text, top_k=2):
52
+ if not memory_store or not embed_model:
53
+ print("No memory or embed model, returning empty context.")
54
+ return ""
55
+ try:
56
+ query_emb = embed_model.encode(query_text, convert_to_tensor=True)
57
+ sims = [util.cos_sim(query_emb, item["embedding"]).item() for item in memory_store]
58
+
59
+ # --- FIX: Removed the "sim > 0.5" filter ---
60
+ top_indices = sorted(range(len(sims)), key=lambda i: sims[i], reverse=True)[:top_k]
61
+
62
+ if not top_indices or sims[top_indices[0]] == 0.0:
63
+ print("No context found (memory store was empty or no similarity).")
64
+ return ""
65
+
66
+ print(f"Top similarity scores found: {[sims[i] for i in top_indices]}")
67
+ context = "\n\n".join([f"Past Ticket: {memory_store[i]['text']}\nResponse: {memory_store[i]['response']}" for i in top_indices])
68
+ print(f"Retrieved context: {context}")
69
+ return context
70
+ except Exception as e:
71
+ print(f"Error retrieving context: {e}")
72
+ return ""
73
+ # --- END MODIFIED ---
74
+
75
+ def create_rag_prompt(ticket: Ticket):
76
+ """Creates the Gemini prompt and returns the prompt AND the context."""
77
+ context = retrieve_context(ticket.summary)
78
+ ticket_text = f"Channel: {ticket.channel}, Severity: {ticket.severity}, Summary: {ticket.summary}"
79
+
80
+ prompt = f"""
81
+ You are an expert banking support assistant.
82
+
83
+ Use the following past cases as context if relevant:
84
+ ---
85
+ {context if context else "No relevant past cases found."}
86
+ ---
87
+
88
+ Now classify this new ticket into:
89
+ 1. AI Code Patch
90
+ 2. Vibe Workflow
91
+ 3. General / Non-Issue — casual greetings, unclear, or unrelated messages.
92
+
93
+
94
+ Return a single, valid JSON object with 'decision', 'reason', and 'next_actions' (as a list of strings).
95
+
96
+ New Ticket:
97
+ {ticket_text}
98
+ """
99
+ return prompt, (context if context else "No relevant past cases found.")
100
+
101
+ # Worker function
102
+ async def worker(worker_id: int):
103
+ print(f"Worker {worker_id} starting...")
104
+ if not genai or not embed_model:
105
+ print(f"Worker {worker_id}: AI services not initialized. Worker stopping.")
106
+ return
107
+
108
+ # Use the correct, stable model name
109
+ model = genai.GenerativeModel('gemini-2.5-flash')
110
+
111
+ while True:
112
+ try:
113
+ ticket_id, ticket = await ticket_queue.get()
114
+ print(f"Worker {worker_id} processing ticket {ticket_id}: {ticket.summary}")
115
+
116
+ results_store[ticket_id] = {"status": "processing"}
117
+
118
+ try:
119
+ prompt, retrieved_context = create_rag_prompt(ticket)
120
+
121
+ start_time = time.perf_counter()
122
+
123
+ # Run the blocking generate_content call in a separate thread
124
+ response = await asyncio.to_thread(
125
+ model.generate_content,
126
+ prompt,
127
+ generation_config=genai.types.GenerationConfig(
128
+ response_mime_type="application/json",
129
+ )
130
+ )
131
+
132
+ processing_time = time.perf_counter() - start_time
133
+ print(f"Worker {worker_id} Gemini processing time: {processing_time:.2f}s")
134
+
135
+ result_json = json.loads(response.text)
136
+ result_json["processing_time"] = processing_time
137
+ result_json["retrieved_context"] = retrieved_context
138
+
139
+ results_store[ticket_id] = {"status": "completed", "result": result_json}
140
+
141
+ # Run blocking add_to_memory in a thread
142
+ await asyncio.to_thread(add_to_memory, ticket.summary, response.text)
143
+
144
+ except Exception as e:
145
+ error_msg = str(e)
146
+ print(f"Worker {worker_id} error processing {ticket_id}: {error_msg}")
147
+ results_store[ticket_id] = {"status": "error", "detail": error_msg}
148
+
149
+ finally:
150
+ ticket_queue.task_done()
151
+
152
+ except Exception as e:
153
+ print(f"Worker {worker_id} critical error: {e}")
154
+ await asyncio.sleep(1)
155
+
156
+ @app.on_event("startup")
157
+ async def startup_event():
158
+ print("Starting 3 workers...")
159
+ for i in range(3):
160
+ asyncio.create_task(worker(i))
161
+
162
+ @app.post("/async_ticket")
163
+ async def async_ticket(ticket: Ticket):
164
+ ticket_id = str(uuid4())
165
+ await ticket_queue.put((ticket_id, ticket))
166
+ results_store[ticket_id] = {"status": "queued"}
167
+ return {"ticket_id": ticket_id, "status": "queued"}
168
+
169
+ @app.get("/result/{ticket_id}")
170
+ async def get_result(ticket_id: str):
171
+ result = results_store.get(ticket_id, {"status": "pending"})
172
+ return result
173
+
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ requests
4
+ pydantic
5
+ python-dotenv
6
+ google-generativeai
7
+ streamlit
8
+ asyncio
9
+ pyngrok
10
+ sentence-transformers
11
+ torch
start.sh ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Set the environment variable from the HF Secret
4
+ # This is crucial for your Gemini calls to work
5
+ export GOOGLE_API_KEY=$GOOGLE_API_KEY
6
+
7
+ # Start the 3 backend FastAPI services in the background
8
+ # The '&' symbol runs them as background processes.
9
+ echo "Starting Orchestrator on port 8000..."
10
+ uvicorn sync_async_routing_API:app --host 0.0.0.0 --port 8000 &
11
+
12
+ echo "Starting Sync Service on port 8001..."
13
+ # Using the filename from your log: sync_path_microservice.py
14
+ uvicorn sync_path_microservice:app --host 0.0.0.0 --port 8001 &
15
+
16
+ echo "Starting Async Service on port 8002..."
17
+ # Using the filename from your log: async_microservice.py
18
+ uvicorn async_microservice:app --host 0.0.0.0 --port 8002 &
19
+
20
+ # Start the Streamlit app in the foreground
21
+ # This is the main process that will keep the container running.
22
+ # Hugging Face will route traffic to this port (8501).
23
+ echo "Starting Streamlit UI on port 8501..."
24
+ streamlit run app.py --server.port 8501 --server.headless true --server.address 0.0.0.0
25
+
sync_async_routing_API.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ import requests
4
+
5
+ app = FastAPI(title="Ticket Routing Service (Orchestrator)")
6
+
7
+ # ---------- Ticket Model ----------
8
+ class Ticket(BaseModel):
9
+ channel: str
10
+ severity: str
11
+ summary: str
12
+
13
+ # ---------- Service URLs ----------
14
+ SYNC_SERVICE_URL = "http://localhost:8001/sync_ticket" # Sync service API
15
+ ASYNC_SERVICE_URL = "http://localhost:8002/async_ticket" # Async service API
16
+ ASYNC_RESULT_URL = "http://localhost:8002/result" # Async service RESULT API
17
+
18
+ # ---------- Routing API ----------
19
+ @app.post("/ticket")
20
+ def route_ticket(ticket: Ticket):
21
+ """
22
+ Receives a ticket from frontend (Streamlit) and routes
23
+ to Sync or Async service based on severity
24
+ """
25
+ try:
26
+ if ticket.severity.lower() == "high":
27
+ # route to Sync Service
28
+ response = requests.post(SYNC_SERVICE_URL, json=ticket.dict())
29
+ else:
30
+ # route to Async Service
31
+ response = requests.post(ASYNC_SERVICE_URL, json=ticket.dict())
32
+
33
+ response.raise_for_status() # Raise an exception for 4xx/5xx errors
34
+ return response.json()
35
+
36
+ except requests.exceptions.RequestException as e:
37
+ print(f"Error connecting to microservice: {e}")
38
+ raise HTTPException(status_code=503, detail="Service unavailable")
39
+
40
+ # --- !!! NEW ENDPOINT TO FIX THE 404 ERROR !!! ---
41
+ @app.get("/result/{ticket_id}")
42
+ def get_async_result(ticket_id: str):
43
+ """
44
+ Forwards the result request from the frontend to the async microservice.
45
+ """
46
+ try:
47
+ # Construct the full URL to the async service's result endpoint
48
+ url = f"{ASYNC_RESULT_URL}/{ticket_id}"
49
+
50
+ response = requests.get(url)
51
+ response.raise_for_status() # Check for errors from the async service
52
+
53
+ return response.json()
54
+
55
+ except requests.exceptions.RequestException as e:
56
+ print(f"Error connecting to async result service: {e}")
57
+ # If the async service can't be reached
58
+ raise HTTPException(status_code=503, detail="Async service unavailable")
sync_path_microservice.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import google.generativeai as genai
2
+ from fastapi import FastAPI, HTTPException
3
+ from pydantic import BaseModel
4
+ import os
5
+ import json
6
+ import time
7
+ from sentence_transformers import SentenceTransformer, util
8
+ import torch
9
+
10
+ # Configure the Gemini client
11
+ try:
12
+ genai.configure(api_key=os.environ.get("GOOGLE_API_KEY"))
13
+ print("Google Gemini client initialized successfully.")
14
+ except Exception as e:
15
+ print(f"Error initializing Google Gemini client: {e}")
16
+ genai = None
17
+
18
+ # ADD RAG MODEL + MEMORY
19
+ try:
20
+ embed_model = SentenceTransformer("all-MiniLM-L6-v2")
21
+ memory_store = [] # List of {"text": "...", "embedding": tensor, "response": "..."}
22
+ print("SentenceTransformer model loaded successfully.")
23
+ except Exception as e:
24
+ print(f"Error loading SentenceTransformer model: {e}")
25
+ embed_model = None
26
+
27
+ app = FastAPI(title="Sync Ticket Service (Gemini RAG)")
28
+
29
+ class Ticket(BaseModel):
30
+ channel: str
31
+ severity: str
32
+ summary: str
33
+
34
+ # RAG HELPER FUNCTIONS
35
+ def add_to_memory(ticket_text, response_text):
36
+ if not embed_model:
37
+ print("Embed model not loaded, skipping memory add.")
38
+ return
39
+ try:
40
+ embedding = embed_model.encode(ticket_text, convert_to_tensor=True)
41
+ memory_store.append({"text": ticket_text, "embedding": embedding, "response": response_text})
42
+ print(f"Added to memory. New memory size: {len(memory_store)}")
43
+ except Exception as e:
44
+ print(f"Error adding to memory: {e}")
45
+
46
+ # --- MODIFIED: retrieve_context function ---
47
+ def retrieve_context(query_text, top_k=2):
48
+ if not memory_store or not embed_model:
49
+ print("No memory or embed model, returning empty context.")
50
+ return ""
51
+ try:
52
+ query_emb = embed_model.encode(query_text, convert_to_tensor=True)
53
+ sims = [util.cos_sim(query_emb, item["embedding"]).item() for item in memory_store]
54
+
55
+ # --- FIX: Removed the "sim > 0.5" filter ---
56
+ # This now just finds the top_k indices, regardless of score.
57
+ # This is better for a prototype so you always see what it's retrieving.
58
+ top_indices = sorted(range(len(sims)), key=lambda i: sims[i], reverse=True)[:top_k]
59
+
60
+ if not top_indices:
61
+ print("No context found (memory store was empty).")
62
+ return ""
63
+
64
+ print(f"Top similarity scores found: {[sims[i] for i in top_indices]}")
65
+ context = "\n\n".join([f"Past Ticket: {memory_store[i]['text']}\nResponse: {memory_store[i]['response']}" for i in top_indices])
66
+ print(f"Retrieved context: {context}")
67
+ return context
68
+ except Exception as e:
69
+ print(f"Error retrieving context: {e}")
70
+ return ""
71
+ # --- END MODIFIED ---
72
+
73
+ def create_rag_prompt(ticket: Ticket):
74
+ """Creates the Gemini prompt and returns the prompt AND the context."""
75
+ context = retrieve_context(ticket.summary)
76
+ ticket_text = f"Channel: {ticket.channel}, Severity: {ticket.severity}, Summary: {ticket.summary}"
77
+
78
+ prompt = f"""
79
+ You are an expert banking support assistant.
80
+
81
+ Use the following past cases as context if relevant:
82
+ ---
83
+ {context if context else "No relevant past cases found."}
84
+ ---
85
+
86
+ Now classify this new ticket into:
87
+ 1. AI Code Patch
88
+ 2. Vibe Workflow
89
+ 3. General / Non-Issue — casual greetings, unclear, or unrelated messages.
90
+
91
+
92
+ Return a single, valid JSON object with 'decision', 'reason', and 'next_actions' (as a list of strings).
93
+
94
+ New Ticket:
95
+ {ticket_text}
96
+ """
97
+ return prompt, (context if context else "No relevant past cases found.")
98
+
99
+ @app.post("/sync_ticket")
100
+ def sync_ticket(ticket: Ticket):
101
+ if not genai or not embed_model:
102
+ raise HTTPException(status_code=503, detail="AI service not initialized")
103
+
104
+ print(f"Received sync ticket (GEMINI RAG MODE): {ticket.summary}")
105
+
106
+ try:
107
+ prompt, retrieved_context = create_rag_prompt(ticket)
108
+ print("--- Sending prompt to Gemini for sync ticket ---")
109
+
110
+ model = genai.GenerativeModel('gemini-2.5-flash')
111
+
112
+ start_time = time.perf_counter()
113
+
114
+ response = model.generate_content(
115
+ prompt,
116
+ generation_config=genai.types.GenerationConfig(
117
+ response_mime_type="application/json",
118
+ )
119
+ )
120
+
121
+ processing_time = time.perf_counter() - start_time
122
+ print(f"Gemini sync processing time: {processing_time:.2f}s")
123
+
124
+ result_json = json.loads(response.text)
125
+ result_json["processing_time"] = processing_time
126
+ result_json["retrieved_context"] = retrieved_context
127
+
128
+ add_to_memory(ticket.summary, response.text)
129
+
130
+ return result_json
131
+
132
+ except Exception as e:
133
+ error_msg = str(e)
134
+ print(f"!!! Unexpected Error in classify_ticket (Gemini): {error_msg}")
135
+ raise HTTPException(status_code=500, detail=error_msg)
136
+