Spaces:

Sathvik-kota
/

proto

Sleeping

App Files Files Community

Sathvik-kota commited on Oct 27, 2025

Commit

14e52d0

verified ·

1 Parent(s): 3f9426c

Upload folder using huggingface_hub

Browse files

Files changed (8) hide show

Dockerfile +36 -0
README.md +1 -10
app.py +179 -0
async_microservice.py +173 -0
requirements.txt +11 -0
start.sh +25 -0
sync_async_routing_API.py +58 -0
sync_path_microservice.py +136 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,36 @@

+# Use a standard Python 3.11 slim image
+FROM python:3.11-slim
+# Set up a new user named "user" with user ID 1000 (required by HF Spaces)
+RUN useradd -m -u 1000 user
+# Switch to the "user" user
+USER user
+# Set home to the user's home directory
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+# Set the working directory to the user's home directory
+WORKDIR $HOME/app
+# Copy the requirements file first to leverage Docker layer caching
+# Use --chown=user to ensure proper permissions
+COPY --chown=user requirements.txt .
+# Install all your Python dependencies
+# Make sure torch and sentence-transformers are in your requirements.txt!
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt
+# Copy all your project files into the container with proper ownership
+COPY --chown=user . .
+# Make our startup script executable
+RUN chmod +x ./start.sh
+# Tell Hugging Face that your app will be on port 8501
+EXPOSE 8501
+# Run the startup script when the container starts
+CMD ["./start.sh"]

README.md CHANGED Viewed

@@ -1,10 +1 @@
----
-title: Proto
-emoji: ⚡
-colorFrom: blue
-colorTo: indigo
-sdk: docker
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


1	+ # Banking_support_ticket-system_prototype

app.py ADDED Viewed

	@@ -0,0 +1,179 @@

+import streamlit as st
+import requests
+import time
+import json
+# --- THIS IS THE NEW LINE ---
+st.set_page_config(layout="wide")
+# ----------------------------
+# ---------- Streamlit Layout ----------
+st.title("Banking Support Ticket System")
+st.subheader("Submit a Ticket")
+# --- NEW: Create two columns ---
+col1, col2 = st.columns(2)
+# --- Column 1: The Form ---
+with col1:
+    with st.form(key="ticket_form"):
+        channel = st.selectbox("Channel", ["Email", "Chat", "Phone"])
+        # Updated selectbox for clarity
+        severity_option = st.selectbox(
+            "Severity (Determines Processing Path)",
+            [
+                "High (Sync - service)",
+                "Medium (Async - service)",
+                "Low (Async - service)"
+            ]
+        )
+        summary = st.text_area("Summary", "Example: My credit card payment is not going through.")
+        submit_button = st.form_submit_button(label="Submit Ticket")
+# Map user-friendly option to the API value
+severity_mapping = {
+    "High (Sync - service)": "High",
+    "Medium (Async - service)": "Medium",
+    "Low (Async - service)": "Low"
+}
+severity = severity_mapping[severity_option]
+# ---------- API Call ----------
+API_URL = "http://localhost:8000/ticket"  # Orchestrator
+RESULT_URL = "http://localhost:8000/result" # Orchestrator's result endpoint
+# --- Column 2: The Results ---
+with col2:
+    if submit_button:
+        ticket_data = {
+            "channel": channel,
+            "severity": severity,
+            "summary": summary
+        }
+        start_time = time.time() # Start round-trip timer
+        # --- NEW: Define the API call logic as a reusable function ---
+        def make_api_call():
+            try:
+                response = requests.post(API_URL, json=ticket_data)
+                if response.status_code == 200:
+                    try:
+                        res = response.json()
+                    except json.JSONDecodeError:
+                        st.error(f"Error: Could not decode JSON response from orchestrator. Response: {response.text}")
+                        res = None
+                    if res:
+                        # Check if ticket is async
+                        if res.get("status") == "queued":
+                            # --- UPDATED ASYNC MESSAGE ---
+                            st.success(f"Your ticket has been submitted to our support team (Job ID: {res['ticket_id']})")
+                            st.info("Our team is reviewing the issue and will get back to you as soon as the problem is traced. The results will appear here automatically.")
+                            result_placeholder = st.empty()
+                            # Poll API until result is ready
+                            while True:
+                                result_resp = requests.get(f"{RESULT_URL}/{res['ticket_id']}")
+                                if result_resp.status_code == 200:
+                                    try:
+                                        result_data = result_resp.json()
+                                    except json.JSONDecodeError:
+                                        result_placeholder.error(f"Error: Could not decode JSON response from result endpoint. Response: {result_resp.text}")
+                                        break
+                                    if result_data.get("status") == "completed":
+                                        result_placeholder.empty() # Clear the "processing" message
+                                        st.success("Support Team Response:")
+                                        result = result_data.get('result', {})
+                                        st.write(f"**Decision:** {result.get('decision', 'N/A')}")
+                                        st.write(f"**Reason:** {result.get('reason', 'N/A')}")
+                                        st.write("**Next Actions:**")
+                                        for step in result.get('next_actions', []):
+                                            st.write(f"- {step}")
+                                        # Display Processing Time
+                                        processing_time = result.get("processing_time")
+                                        if processing_time:
+                                            st.write(f"**AI Processing Time:** {processing_time:.2f} seconds")
+                                        retrieved_context = result.get("retrieved_context")
+                                        if retrieved_context:
+                                            with st.expander("Show RAG Context"):
+                                                st.text(retrieved_context)
+                                        break # Exit polling loop
+                                    # --- UPDATED ASYNC POLLING MESSAGES ---
+                                    elif result_data.get("status") == "processing":
+                                        result_placeholder.info("Our support team is actively reviewing your ticket now...")
+                                    elif result_data.get("status") == "queued":
+                                        result_placeholder.info("Your ticket is in the queue. Our team will review it shortly.")
+                                    elif result_data.get("status") == "error":
+                                        result_placeholder.error(f"Error processing ticket: {result_data.get('detail')}")
+                                        break
+                                    else:
+                                        result_placeholder.info(f"Waiting for result... (Status: {result_data.get('status')})")
+                                elif result_resp.status_code == 404:
+                                    st.error("Error: Result endpoint not found (404). Check Orchestrator (`sync_async_routing_API.py`).")
+                                    break
+                                else:
+                                    st.error(f"Error polling for result: {result_resp.status_code} - {result_resp.text}")
+                                    break
+                                time.sleep(2)
+                        # Sync ticket
+                        elif res.get("decision"):
+                            total_time = time.time() - start_time
+                            # --- UPDATED SYNC MESSAGE ---
+                            st.success("Received Real-Time Support Response:")
+                            st.write(f"**Decision:** {res['decision']}")
+                            st.write(f"**Reason:** {res['reason']}")
+                            st.write("**Next Actions:**")
+                            for step in res['next_actions']:
+                                st.write(f"- {step}")
+                            # Display Processing Time
+                            processing_time = res.get("processing_time")
+                            if processing_time:
+                                st.write(f"**AI Processing Time:** {processing_time:.2f} seconds")
+                            st.write(f"**Total round-trip time:** {total_time:.2f} seconds")
+                            retrieved_context = res.get("retrieved_context")
+                            if retrieved_context:
+                                with st.expander("Show RAG Context"):
+                                    st.text(retrieved_context)
+                        else:
+                            st.error(f"Error: Unknown response from orchestrator: {res}")
+                else:
+                    st.error(f"Error submitting ticket: {response.status_code} - {response.text}")
+            except requests.exceptions.ConnectionError:
+                st.error("API connection failed. Is the Orchestrator (port 8000) running?")
+            except Exception as e:
+                st.error(f"An unexpected error occurred: {e}")
+        # --- NEW: Conditional spinner logic ---
+        if severity == "High":
+            # SYNC: Show spinner while waiting
+            with st.spinner("Contacting support... Please wait."):
+                make_api_call()
+        else:
+            # ASYNC: No spinner, just make the call.
+            # The "queued" status from make_api_call() will appear instantly.
+            make_api_call()

async_microservice.py ADDED Viewed

	@@ -0,0 +1,173 @@

+import google.generativeai as genai
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+import os
+import json
+import asyncio
+import time
+from sentence_transformers import SentenceTransformer, util
+import torch
+from uuid import uuid4
+# Configure the Gemini client
+try:
+    genai.configure(api_key=os.environ.get("GOOGLE_API_KEY"))
+    print("Google Gemini client initialized successfully.")
+except Exception as e:
+    print(f"Error initializing Google Gemini client: {e}")
+    genai = None
+# ADD RAG MODEL + MEMORY
+try:
+    embed_model = SentenceTransformer("all-MiniLM-L6-v2")
+    memory_store = []  # List of {"text": "...", "embedding": tensor, "response": "..."}
+    print("SentenceTransformer model loaded successfully.")
+except Exception as e:
+    print(f"Error loading SentenceTransformer model: {e}")
+    embed_model = None
+app = FastAPI(title="Async Ticket Service (Gemini RAG)")
+ticket_queue = asyncio.Queue()
+results_store = {}
+class Ticket(BaseModel):
+    channel: str
+    severity: str
+    summary: str
+# RAG HELPER FUNCTIONS
+def add_to_memory(ticket_text, response_text):
+    if not embed_model:
+        print("Embed model not loaded, skipping memory add.")
+        return
+    try:
+        embedding = embed_model.encode(ticket_text, convert_to_tensor=True)
+        memory_store.append({"text": ticket_text, "embedding": embedding, "response": response_text})
+        print(f"Added to memory. New memory size: {len(memory_store)}")
+    except Exception as e:
+        print(f"Error adding to memory: {e}")
+# --- MODIFIED: retrieve_context function (with fix) ---
+def retrieve_context(query_text, top_k=2):
+    if not memory_store or not embed_model:
+        print("No memory or embed model, returning empty context.")
+        return ""
+    try:
+        query_emb = embed_model.encode(query_text, convert_to_tensor=True)
+        sims = [util.cos_sim(query_emb, item["embedding"]).item() for item in memory_store]
+        # --- FIX: Removed the "sim > 0.5" filter ---
+        top_indices = sorted(range(len(sims)), key=lambda i: sims[i], reverse=True)[:top_k]
+        if not top_indices or sims[top_indices[0]] == 0.0:
+            print("No context found (memory store was empty or no similarity).")
+            return ""
+        print(f"Top similarity scores found: {[sims[i] for i in top_indices]}")
+        context = "\n\n".join([f"Past Ticket: {memory_store[i]['text']}\nResponse: {memory_store[i]['response']}" for i in top_indices])
+        print(f"Retrieved context: {context}")
+        return context
+    except Exception as e:
+        print(f"Error retrieving context: {e}")
+        return ""
+# --- END MODIFIED ---
+def create_rag_prompt(ticket: Ticket):
+    """Creates the Gemini prompt and returns the prompt AND the context."""
+    context = retrieve_context(ticket.summary)
+    ticket_text = f"Channel: {ticket.channel}, Severity: {ticket.severity}, Summary: {ticket.summary}"
+    prompt = f"""
+You are an expert banking support assistant.
+Use the following past cases as context if relevant:
+---
+{context if context else "No relevant past cases found."}
+---
+Now classify this new ticket into:
+1. AI Code Patch
+2. Vibe Workflow
+3. General / Non-Issue — casual greetings, unclear, or unrelated messages.
+Return a single, valid JSON object with 'decision', 'reason', and 'next_actions' (as a list of strings).
+New Ticket:
+{ticket_text}
+"""
+    return prompt, (context if context else "No relevant past cases found.")
+# Worker function
+async def worker(worker_id: int):
+    print(f"Worker {worker_id} starting...")
+    if not genai or not embed_model:
+        print(f"Worker {worker_id}: AI services not initialized. Worker stopping.")
+        return
+    # Use the correct, stable model name
+    model = genai.GenerativeModel('gemini-2.5-flash')
+    while True:
+        try:
+            ticket_id, ticket = await ticket_queue.get()
+            print(f"Worker {worker_id} processing ticket {ticket_id}: {ticket.summary}")
+            results_store[ticket_id] = {"status": "processing"}
+            try:
+                prompt, retrieved_context = create_rag_prompt(ticket)
+                start_time = time.perf_counter()
+                # Run the blocking generate_content call in a separate thread
+                response = await asyncio.to_thread(
+                    model.generate_content,
+                    prompt,
+                    generation_config=genai.types.GenerationConfig(
+                        response_mime_type="application/json",
+                    )
+                )
+                processing_time = time.perf_counter() - start_time
+                print(f"Worker {worker_id} Gemini processing time: {processing_time:.2f}s")
+                result_json = json.loads(response.text)
+                result_json["processing_time"] = processing_time
+                result_json["retrieved_context"] = retrieved_context
+                results_store[ticket_id] = {"status": "completed", "result": result_json}
+                # Run blocking add_to_memory in a thread
+                await asyncio.to_thread(add_to_memory, ticket.summary, response.text)
+            except Exception as e:
+                error_msg = str(e)
+                print(f"Worker {worker_id} error processing {ticket_id}: {error_msg}")
+                results_store[ticket_id] = {"status": "error", "detail": error_msg}
+            finally:
+                ticket_queue.task_done()
+        except Exception as e:
+            print(f"Worker {worker_id} critical error: {e}")
+            await asyncio.sleep(1)
+@app.on_event("startup")
+async def startup_event():
+    print("Starting 3 workers...")
+    for i in range(3):
+        asyncio.create_task(worker(i))
+@app.post("/async_ticket")
+async def async_ticket(ticket: Ticket):
+    ticket_id = str(uuid4())
+    await ticket_queue.put((ticket_id, ticket))
+    results_store[ticket_id] = {"status": "queued"}
+    return {"ticket_id": ticket_id, "status": "queued"}
+@app.get("/result/{ticket_id}")
+async def get_result(ticket_id: str):
+    result = results_store.get(ticket_id, {"status": "pending"})
+    return result

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+fastapi
+uvicorn
+requests
+pydantic
+python-dotenv
+google-generativeai
+streamlit
+asyncio
+pyngrok
+sentence-transformers
+torch

start.sh ADDED Viewed

	@@ -0,0 +1,25 @@

+#!/bin/bash
+# Set the environment variable from the HF Secret
+# This is crucial for your Gemini calls to work
+export GOOGLE_API_KEY=$GOOGLE_API_KEY
+# Start the 3 backend FastAPI services in the background
+# The '&' symbol runs them as background processes.
+echo "Starting Orchestrator on port 8000..."
+uvicorn sync_async_routing_API:app --host 0.0.0.0 --port 8000 &
+echo "Starting Sync Service on port 8001..."
+# Using the filename from your log: sync_path_microservice.py
+uvicorn sync_path_microservice:app --host 0.0.0.0 --port 8001 &
+echo "Starting Async Service on port 8002..."
+# Using the filename from your log: async_microservice.py
+uvicorn async_microservice:app --host 0.0.0.0 --port 8002 &
+# Start the Streamlit app in the foreground
+# This is the main process that will keep the container running.
+# Hugging Face will route traffic to this port (8501).
+echo "Starting Streamlit UI on port 8501..."
+streamlit run app.py --server.port 8501 --server.headless true --server.address 0.0.0.0

sync_async_routing_API.py ADDED Viewed

	@@ -0,0 +1,58 @@

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+import requests
+app = FastAPI(title="Ticket Routing Service (Orchestrator)")
+# ---------- Ticket Model ----------
+class Ticket(BaseModel):
+    channel: str
+    severity: str
+    summary: str
+# ---------- Service URLs ----------
+SYNC_SERVICE_URL = "http://localhost:8001/sync_ticket"    # Sync service API
+ASYNC_SERVICE_URL = "http://localhost:8002/async_ticket"  # Async service API
+ASYNC_RESULT_URL = "http://localhost:8002/result"         # Async service RESULT API
+# ---------- Routing API ----------
+@app.post("/ticket")
+def route_ticket(ticket: Ticket):
+    """
+    Receives a ticket from frontend (Streamlit) and routes
+    to Sync or Async service based on severity
+    """
+    try:
+        if ticket.severity.lower() == "high":
+            # route to Sync Service
+            response = requests.post(SYNC_SERVICE_URL, json=ticket.dict())
+        else:
+            # route to Async Service
+            response = requests.post(ASYNC_SERVICE_URL, json=ticket.dict())
+        response.raise_for_status() # Raise an exception for 4xx/5xx errors
+        return response.json()
+    except requests.exceptions.RequestException as e:
+        print(f"Error connecting to microservice: {e}")
+        raise HTTPException(status_code=503, detail="Service unavailable")
+# --- !!! NEW ENDPOINT TO FIX THE 404 ERROR !!! ---
+@app.get("/result/{ticket_id}")
+def get_async_result(ticket_id: str):
+    """
+    Forwards the result request from the frontend to the async microservice.
+    """
+    try:
+        # Construct the full URL to the async service's result endpoint
+        url = f"{ASYNC_RESULT_URL}/{ticket_id}"
+        response = requests.get(url)
+        response.raise_for_status() # Check for errors from the async service
+        return response.json()
+    except requests.exceptions.RequestException as e:
+        print(f"Error connecting to async result service: {e}")
+        # If the async service can't be reached
+        raise HTTPException(status_code=503, detail="Async service unavailable")

sync_path_microservice.py ADDED Viewed

	@@ -0,0 +1,136 @@

+import google.generativeai as genai
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+import os
+import json
+import time
+from sentence_transformers import SentenceTransformer, util
+import torch
+# Configure the Gemini client
+try:
+    genai.configure(api_key=os.environ.get("GOOGLE_API_KEY"))
+    print("Google Gemini client initialized successfully.")
+except Exception as e:
+    print(f"Error initializing Google Gemini client: {e}")
+    genai = None
+# ADD RAG MODEL + MEMORY
+try:
+    embed_model = SentenceTransformer("all-MiniLM-L6-v2")
+    memory_store = []  # List of {"text": "...", "embedding": tensor, "response": "..."}
+    print("SentenceTransformer model loaded successfully.")
+except Exception as e:
+    print(f"Error loading SentenceTransformer model: {e}")
+    embed_model = None
+app = FastAPI(title="Sync Ticket Service (Gemini RAG)")
+class Ticket(BaseModel):
+    channel: str
+    severity: str
+    summary: str
+# RAG HELPER FUNCTIONS
+def add_to_memory(ticket_text, response_text):
+    if not embed_model:
+        print("Embed model not loaded, skipping memory add.")
+        return
+    try:
+        embedding = embed_model.encode(ticket_text, convert_to_tensor=True)
+        memory_store.append({"text": ticket_text, "embedding": embedding, "response": response_text})
+        print(f"Added to memory. New memory size: {len(memory_store)}")
+    except Exception as e:
+        print(f"Error adding to memory: {e}")
+# --- MODIFIED: retrieve_context function ---
+def retrieve_context(query_text, top_k=2):
+    if not memory_store or not embed_model:
+        print("No memory or embed model, returning empty context.")
+        return ""
+    try:
+        query_emb = embed_model.encode(query_text, convert_to_tensor=True)
+        sims = [util.cos_sim(query_emb, item["embedding"]).item() for item in memory_store]
+        # --- FIX: Removed the "sim > 0.5" filter ---
+        # This now just finds the top_k indices, regardless of score.
+        # This is better for a prototype so you always see what it's retrieving.
+        top_indices = sorted(range(len(sims)), key=lambda i: sims[i], reverse=True)[:top_k]
+        if not top_indices:
+            print("No context found (memory store was empty).")
+            return ""
+        print(f"Top similarity scores found: {[sims[i] for i in top_indices]}")
+        context = "\n\n".join([f"Past Ticket: {memory_store[i]['text']}\nResponse: {memory_store[i]['response']}" for i in top_indices])
+        print(f"Retrieved context: {context}")
+        return context
+    except Exception as e:
+        print(f"Error retrieving context: {e}")
+        return ""
+# --- END MODIFIED ---
+def create_rag_prompt(ticket: Ticket):
+    """Creates the Gemini prompt and returns the prompt AND the context."""
+    context = retrieve_context(ticket.summary)
+    ticket_text = f"Channel: {ticket.channel}, Severity: {ticket.severity}, Summary: {ticket.summary}"
+    prompt = f"""
+You are an expert banking support assistant.
+Use the following past cases as context if relevant:
+---
+{context if context else "No relevant past cases found."}
+---
+Now classify this new ticket into:
+1. AI Code Patch
+2. Vibe Workflow
+3. General / Non-Issue — casual greetings, unclear, or unrelated messages.
+Return a single, valid JSON object with 'decision', 'reason', and 'next_actions' (as a list of strings).
+New Ticket:
+{ticket_text}
+"""
+    return prompt, (context if context else "No relevant past cases found.")
+@app.post("/sync_ticket")
+def sync_ticket(ticket: Ticket):
+    if not genai or not embed_model:
+        raise HTTPException(status_code=503, detail="AI service not initialized")
+    print(f"Received sync ticket (GEMINI RAG MODE): {ticket.summary}")
+    try:
+        prompt, retrieved_context = create_rag_prompt(ticket)
+        print("--- Sending prompt to Gemini for sync ticket ---")
+        model = genai.GenerativeModel('gemini-2.5-flash')
+        start_time = time.perf_counter()
+        response = model.generate_content(
+            prompt,
+            generation_config=genai.types.GenerationConfig(
+                response_mime_type="application/json",
+            )
+        )
+        processing_time = time.perf_counter() - start_time
+        print(f"Gemini sync processing time: {processing_time:.2f}s")
+        result_json = json.loads(response.text)
+        result_json["processing_time"] = processing_time
+        result_json["retrieved_context"] = retrieved_context
+        add_to_memory(ticket.summary, response.text)
+        return result_json
+    except Exception as e:
+        error_msg = str(e)
+        print(f"!!! Unexpected Error in classify_ticket (Gemini): {error_msg}")
+        raise HTTPException(status_code=500, detail=error_msg)