Spaces:

SyedShaheer
/

voice-task-backend

Running

App Files Files Community

SyedShaheer commited on 12 days ago

Commit

bc10fd9

verified ·

1 Parent(s): 9e8ff8f

Upload 6 files

Browse files

Files changed (6) hide show

.dockerignore +5 -0
Dockerfile +22 -0
database.py +141 -0
main.py +494 -0
model_manager.py +159 -0
requirements.txt +12 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,5 @@

+.env
+__pycache__
+*.db
+.git
+.ipynb_checkpoints

Dockerfile ADDED Viewed

	@@ -0,0 +1,22 @@

+# Use a lightweight Python image
+FROM python:3.11-slim
+# Set the working directory inside the container
+WORKDIR /app
+# Copy the requirements file first to leverage Docker caching
+COPY requirements.txt .
+# Install dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the rest of your application code
+COPY . .
+# Hugging Face Spaces specifically listens on port 7860
+ENV PORT=7860
+EXPOSE 7860
+# Start the FastAPI app using uvicorn
+# We use 0.0.0.0 so it's accessible outside the container
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

database.py ADDED Viewed

	@@ -0,0 +1,141 @@

+import sqlite3
+from datetime import datetime, date, timedelta
+# ─── Init ──────────────────────────────────────────────────────────────────────
+def init_db():
+    conn = sqlite3.connect('tasks.db')
+    cursor = conn.cursor()
+    # Create table with new date_context column
+    cursor.execute('''
+        CREATE TABLE IF NOT EXISTS tasks (
+            id           INTEGER PRIMARY KEY AUTOINCREMENT,
+            title        TEXT    NOT NULL,
+            time_context TEXT    NOT NULL,
+            date_context TEXT    NOT NULL DEFAULT 'today',
+            status       TEXT    DEFAULT 'pending'
+        )
+    ''')
+    # Migrate existing DB: add date_context if it doesn't exist yet
+    try:
+        cursor.execute("ALTER TABLE tasks ADD COLUMN date_context TEXT NOT NULL DEFAULT 'today'")
+        print("Migration: added date_context column.")
+    except sqlite3.OperationalError:
+        pass  # Column already exists — safe to ignore
+    conn.commit()
+    conn.close()
+# ─── Helpers ───────────────────────────────────────────────────────────────────
+def get_db_connection():
+    conn = sqlite3.connect('tasks.db')
+    conn.row_factory = sqlite3.Row
+    return conn
+def resolve_date(date_context: str) -> str:
+    """
+    Converts natural language date strings into ISO format (YYYY-MM-DD).
+    Accepts: 'today', 'tomorrow', 'YYYY-MM-DD', or any existing value.
+    Returns the resolved ISO date string, or the raw value if unrecognised.
+    """
+    if not date_context:
+        return date.today().isoformat()
+    normalised = date_context.strip().lower()
+    if normalised == "today":
+        return date.today().isoformat()
+    elif normalised == "tomorrow":
+        return (date.today() + timedelta(days=1)).isoformat()
+    elif normalised == "yesterday":
+        return (date.today() - timedelta(days=1)).isoformat()
+    # Already an ISO date — return as-is
+    try:
+        datetime.strptime(date_context.strip(), "%Y-%m-%d")
+        return date_context.strip()
+    except ValueError:
+        pass
+    # Unrecognised — store raw so AI-generated strings like "next Monday" are kept
+    return date_context.strip()
+# ─── CRUD ──────────────────────────────────────────────────────────────────────
+def get_all_tasks():
+    conn = get_db_connection()
+    cursor = conn.cursor()
+    cursor.execute("SELECT * FROM tasks WHERE status = 'pending' ORDER BY date_context, time_context")
+    rows = cursor.fetchall()
+    conn.close()
+    return [dict(row) for row in rows]
+def get_tasks_by_date(date_context: str):
+    """Fetch pending tasks for a specific date (accepts 'today', 'tomorrow', or ISO date)."""
+    resolved = resolve_date(date_context)
+    conn = get_db_connection()
+    cursor = conn.cursor()
+    cursor.execute(
+        "SELECT * FROM tasks WHERE status = 'pending' AND date_context = ? ORDER BY time_context",
+        (resolved,)
+    )
+    rows = cursor.fetchall()
+    conn.close()
+    return [dict(row) for row in rows]
+def create_task(title: str, time_context: str, date_context: str = "today"):
+    resolved_date = resolve_date(date_context)
+    conn = get_db_connection()
+    cursor = conn.cursor()
+    cursor.execute(
+        "INSERT INTO tasks (title, time_context, date_context) VALUES (?, ?, ?)",
+        (title, time_context, resolved_date)
+    )
+    conn.commit()
+    new_id = cursor.lastrowid
+    conn.close()
+    # Return the created task so main.py can track last_task_id
+    return {"id": new_id, "title": title, "time_context": time_context, "date_context": resolved_date}
+def delete_task(task_id: int):
+    conn = get_db_connection()
+    cursor = conn.cursor()
+    cursor.execute("DELETE FROM tasks WHERE id = ?", (task_id,))
+    conn.commit()
+    conn.close()
+def update_task(task_id: int, new_time: str = None, new_date: str = None, new_title: str = None):
+    conn = get_db_connection()
+    cursor = conn.cursor()
+    if new_time:
+        cursor.execute("UPDATE tasks SET time_context = ? WHERE id = ?", (new_time, task_id))
+    if new_date:
+        resolved = resolve_date(new_date)
+        cursor.execute("UPDATE tasks SET date_context = ? WHERE id = ?", (resolved, task_id))
+    if new_title:
+        cursor.execute("UPDATE tasks SET title = ? WHERE id = ?", (new_title, task_id))
+    conn.commit()
+    conn.close()
+def complete_task(task_id: int):
+    """Mark a task as done without deleting it."""
+    conn = get_db_connection()
+    cursor = conn.cursor()
+    cursor.execute("UPDATE tasks SET status = 'done' WHERE id = ?", (task_id,))
+    conn.commit()
+    conn.close()
+if __name__ == "__main__":
+    init_db()
+    print("Database initialised successfully.")

main.py ADDED Viewed

	@@ -0,0 +1,494 @@

+import os
+import json
+import uuid
+from datetime import datetime
+from fastapi import FastAPI, Header
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from dotenv import load_dotenv
+import google.generativeai as genai
+from typing import Dict, Optional, List
+from database import get_all_tasks, create_task, delete_task, update_task
+from model_manager import model_manager
+load_dotenv()
+genai.configure(api_key=os.environ["GEMINI_API_KEY"])
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# ─── Session Store ─────────────────────────────────────────────────────────────
+sessions: Dict[str, Dict] = {}
+def get_or_create_session(session_id: str) -> Dict:
+    if session_id not in sessions:
+        sessions[session_id] = {
+            "history":         [],
+            "last_task_id":    None,
+            "last_task_title": None,
+            "last_read_tasks": [],
+            "pending_delete":  None,   # task_id awaiting confirmation
+        }
+    return sessions[session_id]
+# ─── Request / Response Models ─────────────────────────────────────────────────
+class ChatRequest(BaseModel):
+    text: str
+class ChatResponse(BaseModel):
+    intent:       str
+    tts_response: str
+    session_id:   str
+    model_used:   str
+# ─── Helpers ───────────────────────────────────────────────────────────────────
+def get_current_datetime_context() -> str:
+    now = datetime.now()
+    return (
+        f"Current date : {now.strftime('%A, %B %d, %Y')}\n"
+        f"Current time : {now.strftime('%I:%M %p')}\n"
+        f"Time periods : morning = before 12 PM | afternoon = 12–5 PM | "
+        f"evening = 5–9 PM | night = after 9 PM"
+    )
+def build_last_task_hint(session: Dict) -> str:
+    parts = []
+    if session["last_task_id"] is not None:
+        lid    = session["last_task_id"]
+        ltitle = session.get("last_task_title") or f"ID {lid}"
+        parts.append(
+            f"*** CRITICAL CONTEXT ***\n"
+            f"The LAST task the user explicitly referenced was: '{ltitle}' (ID: {lid}).\n"
+            f"If the user says ANYTHING vague — 'the previous one', 'that one', 'it',\n"
+            f"'actually', 'change that', 'change it', 'move it' — you MUST use "
+            f"target_task_id: {lid} in that action.\n"
+            f"Do NOT pick a different task unless the user explicitly names one by title.\n"
+            f"*** END CRITICAL CONTEXT ***"
+        )
+    last_read = session.get("last_read_tasks", [])
+    if last_read:
+        ordered = "\n".join(
+            f"  Position {i+1}: '{t['title']}' at {t['time_context']} (ID: {t['id']})"
+            for i, t in enumerate(last_read)
+        )
+        parts.append(
+            f"*** LAST READ LIST ***\n"
+            f"The assistant just listed these tasks in this order:\n{ordered}\n"
+            f"If the user says 'the first one', 'the second one', 'the last one', etc.,\n"
+            f"resolve from this list and use that task's ID in the relevant action.\n"
+            f"*** END LAST READ LIST ***"
+        )
+    return "\n\n".join(parts)
+# ─── Semantic category map ─────────────────────────────────────────────────────
+# Maps common spoken concepts → keywords likely found in task titles
+SEMANTIC_CATEGORIES = {
+    "workout":    ["workout", "gym", "exercise", "run", "running", "training", "fitness",
+                   "yoga", "pilates", "crossfit", "lift", "weights", "jog", "swim", "cycling", "bike"],
+    "meeting":    ["meeting", "meet", "sync", "call", "standup", "stand-up", "catch-up",
+                   "catchup", "1:1", "one on one", "interview", "review", "session"],
+    "linkedin":   ["linkedin", "post", "social", "content", "publish", "share"],
+    "email":      ["email", "mail", "inbox", "reply", "respond", "message"],
+    "lunch":      ["lunch", "eat", "food", "meal", "dinner", "breakfast", "coffee", "cafe"],
+    "doctor":     ["doctor", "dentist", "appointment", "checkup", "clinic", "hospital", "physio"],
+    "study":      ["study", "read", "reading", "course", "class", "lecture", "homework", "revision"],
+    "errand":     ["errand", "shop", "shopping", "grocery", "groceries", "bank", "pickup"],
+    "travel":     ["travel", "flight", "commute", "drive", "uber", "taxi", "train", "bus"],
+}
+def build_semantic_hint(user_text: str, tasks: list) -> str:
+    """
+    Detects semantic concepts in the user utterance and finds tasks
+    whose titles match those concepts. Injects a targeted hint so
+    Gemini can resolve vague references like 'my evening workout'.
+    """
+    text_lower = user_text.lower()
+    matched_tasks = {}  # task_id → task
+    for concept, keywords in SEMANTIC_CATEGORIES.items():
+        if any(kw in text_lower for kw in keywords):
+            # Find tasks whose title contains any keyword from this category
+            for task in tasks:
+                title_lower = task["title"].lower()
+                if any(kw in title_lower for kw in keywords):
+                    matched_tasks[task["id"]] = task
+    # Also apply time-period narrowing from the utterance
+    time_filters = {
+        "morning":   lambda t: (parse_minutes(t) or 9999) < 720,    # before 12:00
+        "afternoon": lambda t: 720 <= (parse_minutes(t) or 0) < 1020,
+        "evening":   lambda t: 1020 <= (parse_minutes(t) or 0) < 1260,
+        "night":     lambda t: (parse_minutes(t) or 0) >= 1260,
+    }
+    active_filter = None
+    for period, fn in time_filters.items():
+        if period in text_lower:
+            active_filter = fn
+            break
+    if active_filter and matched_tasks:
+        narrowed = {
+            tid: t for tid, t in matched_tasks.items()
+            if active_filter(t.get("time_context", ""))
+        }
+        if narrowed:
+            matched_tasks = narrowed
+    if not matched_tasks:
+        return ""
+    task_list = "\n".join(
+        f"  - '{t['title']}' at {t['time_context']} on {t.get('date_context','today')} (ID: {t['id']})"
+        for t in matched_tasks.values()
+    )
+    return (
+        f"\n\n*** SEMANTIC MATCH ***"
+        f"\nThe user said '{user_text}'. Based on semantic analysis, the most likely "
+        f"task(s) they are referring to:\n{task_list}"
+        f"\nUse the ID from this list as target_task_id. If only one match, use it directly."
+        f"\nIf multiple matches exist, pick the one that best fits the time period mentioned."
+        f"\n*** END SEMANTIC MATCH ***"
+    )
+def resolve_confirmation(text: str) -> Optional[bool]:
+    """
+    Returns True = confirmed, False = cancelled, None = unrelated input.
+    Detects the LAST matching word so 'actually wait no' correctly cancels.
+    """
+    cleaned = text.lower()
+    for p in ".,!?;:'\"": cleaned = cleaned.replace(p, "")
+    padded = f" {cleaned} "
+    confirms = ["yes","yeah","yep","sure","ok","okay","confirm","please","do it","go ahead","delete it"]
+    cancels  = ["no","nope","cancel","stop","nevermind","never mind","dont","wait","keep it"]
+    last_confirm = max([padded.rfind(f" {w} ") for w in confirms] + [-1])
+    last_cancel  = max([padded.rfind(f" {w} ") for w in cancels]  + [-1])
+    if last_confirm == -1 and last_cancel == -1:
+        return None
+    return last_confirm > last_cancel
+def parse_minutes(time_str: str) -> Optional[int]:
+    """Convert a time string like '11:05 AM', '9 PM', '14:30' to total minutes since midnight."""
+    import re
+    if not time_str:
+        return None
+    s = time_str.strip().upper()
+    # Try HH:MM AM/PM
+    m = re.match(r"(\d{1,2}):(\d{2})\s*(AM|PM)?", s)
+    if m:
+        h, mn, period = int(m.group(1)), int(m.group(2)), m.group(3)
+        if period == "PM" and h != 12: h += 12
+        if period == "AM" and h == 12: h = 0
+        return h * 60 + mn
+    # Try H AM/PM (no minutes)
+    m = re.match(r"(\d{1,2})\s*(AM|PM)", s)
+    if m:
+        h, period = int(m.group(1)), m.group(2)
+        if period == "PM" and h != 12: h += 12
+        if period == "AM" and h == 12: h = 0
+        return h * 60
+    return None
+def find_closest_task(requested_time: str, tasks: list, threshold_minutes: int = 60) -> Optional[dict]:
+    """
+    Returns the task whose time_context is closest to requested_time,
+    only if within threshold_minutes. Returns None if no close match.
+    """
+    req_mins = parse_minutes(requested_time)
+    if req_mins is None:
+        return None
+    best_task  = None
+    best_delta = threshold_minutes + 1
+    for task in tasks:
+        task_mins = parse_minutes(task.get("time_context", ""))
+        if task_mins is None:
+            continue
+        delta = abs(task_mins - req_mins)
+        if delta < best_delta:
+            best_delta = delta
+            best_task  = task
+    return best_task if best_task else None
+# ─── Endpoints ─────────────────────────────────────────────────────────────────
+@app.get("/api/tasks")
+async def get_tasks_endpoint():
+    return get_all_tasks()
+@app.get("/api/models")
+async def list_models_endpoint():
+    return {"models": model_manager.status()}
+@app.post("/api/chat", response_model=ChatResponse)
+async def chat_endpoint(
+    request: ChatRequest,
+    x_session_id: Optional[str] = Header(default=None),
+):
+    session_id = x_session_id or str(uuid.uuid4())
+    session    = get_or_create_session(session_id)
+    session["history"].append({"role": "user", "text": request.text})
+    print(f"[{session_id}] User: {request.text}")
+    # ── Pending delete confirmation check ──────────────────────────────────────
+    if session["pending_delete"] is not None:
+        confirmed = resolve_confirmation(request.text)
+        pending_id = session["pending_delete"]
+        if confirmed is True:
+            matched = next((t for t in get_all_tasks() if t["id"] == pending_id), None)
+            session["pending_delete"] = None
+            if matched:
+                delete_task(pending_id)
+                if session["last_task_id"] == pending_id:
+                    session["last_task_id"]    = None
+                    session["last_task_title"] = None
+                msg = f"Done, I've deleted '{matched['title']}' scheduled at {matched['time_context']}."
+            else:
+                msg = "That task no longer exists."
+            session["history"].append({"role": "agent", "text": msg})
+            return ChatResponse(intent="DELETE", tts_response=msg, session_id=session_id, model_used="confirmation-handler")
+        elif confirmed is False:
+            session["pending_delete"] = None
+            msg = "Got it, I'll keep the task. Anything else?"
+            session["history"].append({"role": "agent", "text": msg})
+            return ChatResponse(intent="CHAT", tts_response=msg, session_id=session_id, model_used="confirmation-handler")
+        else:
+            # User changed subject — clear pending and fall through to normal AI flow
+            session["pending_delete"] = None
+    # ── Build prompt ───────────────────────────────────────────────────────────
+    current_tasks     = get_all_tasks()
+    datetime_context  = get_current_datetime_context()
+    formatted_history = "\n".join(f"{m['role'].upper()}: {m['text']}" for m in session["history"])
+    hint_block        = build_last_task_hint(session)
+    # ── Pre-resolve 1: fuzzy time match ──────────────────────────────────────
+    import re as _re
+    _time_pat = _re.search(
+        r"\b(\d{1,2}(?::\d{2})?\s*(?:AM|PM|am|pm))\b", request.text
+    )
+    _fuzzy_hint = ""
+    if _time_pat:
+        _req_time = _time_pat.group(1)
+        _req_mins = parse_minutes(_req_time)
+        _exact = any(
+            parse_minutes(t.get("time_context","")) == _req_mins
+            for t in current_tasks
+        )
+        if not _exact and _req_mins is not None:
+            _closest = find_closest_task(_req_time, current_tasks, threshold_minutes=90)
+            if _closest:
+                _fuzzy_hint = (
+                    f"\n*** FUZZY TIME MATCH ***"
+                    f"\nThe user asked about a task at {_req_time} but NO task exists at that exact time."
+                    f"\nThe CLOSEST task is: '{_closest['title']}' at {_closest['time_context']} (ID: {_closest['id']})."
+                    f"\nIf the user intent is DELETE or UPDATE, use ID {_closest['id']} as target_task_id."
+                    f"\nDo NOT say the task was not found. Instead use this closest match."
+                    f"\n*** END FUZZY TIME MATCH ***"
+                )
+    # ── Pre-resolve 2: semantic concept match ─────────────────────────────────
+    _semantic_hint = build_semantic_hint(request.text, current_tasks)
+    system_prompt = f"""
+You are an intelligent Voice Task Manager. You MUST handle multiple actions in a single response when the user asks for them.
+{datetime_context}
+{hint_block}{_fuzzy_hint}{_semantic_hint}
+Current tasks in the database:
+{json.dumps(current_tasks, indent=2)}
+Conversation history (oldest → newest):
+{formatted_history}
+Output a strict JSON object with NO markdown. Each action in the "actions" array is independent.
+Schema:
+{{
+  "actions": [
+    {{
+      "intent": "CREATE" | "UPDATE" | "DELETE" | "READ" | "CHAT",
+      "target_task_id": <integer task ID for UPDATE/DELETE, or null>,
+      "entities": {{
+        "title":        "Task title — required for CREATE, optional for UPDATE (if renaming)",
+        "time_context": "e.g. '7:00 AM' — required for CREATE, optional for UPDATE",
+        "date_context": "e.g. 'today', 'tomorrow', 'YYYY-MM-DD' — required for CREATE, optional for UPDATE",
+        "time_filter":  "morning|afternoon|evening|night|today|tomorrow|all — READ only"
+      }},
+      "read_task_ids": [ordered list of task IDs mentioned — READ only, else omit]
+    }}
+  ],
+  "tts_response": "A single natural spoken reply covering ALL actions together."
+}}
+Rules — READ CAREFULLY:
+1. MULTI-ACTION: If the user requests N things (e.g. 3 tasks, or create + delete), produce N action objects.
+   Example: "Gym at 7, sync at 9, LinkedIn at 11 tomorrow" → 3 CREATE actions.
+   Example: "Delete LinkedIn and add a call at 4 PM" → 1 DELETE + 1 CREATE action.
+2. CREATE: Every CREATE action needs its own title, time_context, date_context (default 'today').
+3. UPDATE: target_task_id goes INSIDE the action object. Only fill changed entity fields.
+4. DELETE: target_task_id goes INSIDE the action object. Set entities to {{}}.
+   Only use IDs that exist in the database list. Never invent IDs.
+5. READ: Use time_filter to select which tasks to mention. Speak naturally, not as a list.
+   Fill read_task_ids in the order you mention them.
+6. tts_response is ONE combined reply for everything, e.g.:
+   "Done! I've added Gym at 7 AM, Team sync at 9 AM, and LinkedIn post at 11 AM — all for tomorrow morning."
+7. Vague references ('the previous one', 'it', 'that', 'the second one'):
+   Resolve using the CRITICAL CONTEXT and LAST READ LIST hints above.
+   Never invent task IDs.
+8. Semantic references ('my workout', 'the meeting', 'evening run', 'the LinkedIn thing'):
+   Resolve using the SEMANTIC MATCH hint above when present.
+   Match by concept, not exact wording — 'gym session' matches a task called 'Morning Workout'.
+   If a time period is mentioned ('evening workout'), use it to narrow among multiple matches.
+   Always prefer the SEMANTIC MATCH hint ID over guessing from the task title alone.
+Time-filter reference:
+- morning   → before 12 PM
+- afternoon → 12 PM – 5 PM
+- evening   → 5 PM – 9 PM
+- night     → after 9 PM
+- today / tomorrow → by date
+- all       → no filter
+"""
+    try:
+        response_text, model_used = model_manager.call_with_fallback(system_prompt)
+        ai_decision  = json.loads(response_text)
+        actions      = ai_decision.get("actions", [])
+        tts_response = ai_decision.get("tts_response", "Done.")
+        print(f"[{session_id}] Decision ({model_used}) — {len(actions)} action(s):", ai_decision)
+        last_intent = "CHAT"
+        for action in actions:
+            intent   = action.get("intent", "CHAT")
+            tid      = action.get("target_task_id")
+            entities = action.get("entities", {})
+            last_intent = intent
+            if intent == "CREATE":
+                task_title = entities.get("title", "Untitled")
+                new_task   = create_task(
+                    task_title,
+                    entities.get("time_context", ""),
+                    entities.get("date_context", "today"),
+                )
+                if isinstance(new_task, dict) and "id" in new_task:
+                    session["last_task_id"]    = new_task["id"]
+                    session["last_task_title"] = task_title
+            elif intent == "UPDATE":
+                if tid:
+                    update_task(
+                        tid,
+                        new_time=entities.get("time_context"),
+                        new_date=entities.get("date_context"),
+                        new_title=entities.get("title"),  # <-- ADD THIS LINE
+                    )
+                    session["last_task_id"] = tid
+                    matched = next((t for t in current_tasks if t.get("id") == tid), None)
+                    session["last_task_title"] = matched["title"] if matched else None
+            elif intent == "DELETE":
+                import re as _re2
+                # ── Step 1: exact match by ID Gemini provided ──────────────────
+                matched = next((t for t in current_tasks if t.get("id") == tid), None) if tid else None
+                # ── Step 2: fallback — fuzzy match from raw utterance ──────────
+                if not matched:
+                    _tp = _re2.search(r"\b(\d{1,2}(?::\d{2})?\s*(?:AM|PM|am|pm))\b", request.text)
+                    _rts = _tp.group(1) if _tp else ""
+                    matched = find_closest_task(_rts, current_tasks, threshold_minutes=90) if _rts else None
+                if matched:
+                    # ── Step 3: always confirm before deleting ─────────────────
+                    req_time_str = ""
+                    _tp2 = _re2.search(r"\b(\d{1,2}(?::\d{2})?\s*(?:AM|PM|am|pm))\b", request.text)
+                    if _tp2:
+                        req_time_str = _tp2.group(1)
+                    exact_match = parse_minutes(req_time_str) == parse_minutes(matched["time_context"]) if req_time_str else True
+                    if exact_match:
+                        confirm_msg = (
+                            f"Just to confirm — delete '{matched['title']}' "
+                            f"at {matched['time_context']}? Say yes to confirm or no to cancel."
+                        )
+                    else:
+                        confirm_msg = (
+                            f"I couldn't find a task at {req_time_str}. "
+                            f"Did you mean '{matched['title']}' at {matched['time_context']}? "
+                            f"Say yes to delete it or no to cancel."
+                        )
+                    session["pending_delete"] = matched["id"]
+                    session["history"].append({"role": "agent", "text": confirm_msg})
+                    return ChatResponse(
+                        intent="CLARIFICATION",
+                        tts_response=confirm_msg,
+                        session_id=session_id,
+                        model_used=model_used,
+                    )
+                # else: nothing found at all — fall through, AI tts_response handles it
+            elif intent == "READ":
+                read_ids   = action.get("read_task_ids", [])
+                id_to_task = {t["id"]: t for t in current_tasks}
+                if read_ids:
+                    session["last_read_tasks"] = [
+                        id_to_task[rid] for rid in read_ids if rid in id_to_task
+                    ]
+                    if session["last_read_tasks"]:
+                        last = session["last_read_tasks"][-1]
+                        session["last_task_id"]    = last["id"]
+                        session["last_task_title"] = last["title"]
+        session["history"].append({"role": "agent", "text": tts_response})
+        return ChatResponse(
+            intent=last_intent,
+            tts_response=tts_response,
+            session_id=session_id,
+            model_used=model_used,
+        )
+    except RuntimeError as e:
+        msg = "All AI models are currently rate-limited. Please wait a moment and try again."
+        print(f"[{session_id}] {e}")
+        session["history"].append({"role": "agent", "text": msg})
+        return ChatResponse(intent="ERROR", tts_response=msg, session_id=session_id, model_used="none")
+    except Exception as e:
+        msg = "Sorry, I had trouble processing that request."
+        print(f"[{session_id}] Error: {e}")
+        session["history"].append({"role": "agent", "text": msg})
+        return ChatResponse(intent="ERROR", tts_response=msg, session_id=session_id, model_used="unknown")

model_manager.py ADDED Viewed

	@@ -0,0 +1,159 @@

+import time
+import threading
+from collections import deque
+from typing import Optional
+import google.generativeai as genai
+# ─── Model Pool (only models with actual quota) ───────────────────────────────
+# Ordered by preference: most quota first
+MODEL_POOL = [
+    {
+        "key":     "gemini-3.1-flash-lite",
+        "name":    "Gemini 3.1 Flash Lite",
+        "rpm":     15,
+        "rpd":     500,
+        "tpm":     250_000,
+    },
+    {
+        "key":     "gemini-2.5-flash-lite",  # gemini-2.5-flash-lite-preview-06-17 if needed
+        "name":    "Gemini 2.5 Flash Lite",
+        "rpm":     10,
+        "rpd":     20,
+        "tpm":     250_000,
+    },
+    {
+        "key":     "gemini-2.5-flash",
+        "name":    "Gemini 2.5 Flash",
+        "rpm":     5,
+        "rpd":     20,
+        "tpm":     250_000,
+    },
+    {
+        "key":     "gemini-2.0-flash",       # "Gemini 3 Flash" in the UI
+        "name":    "Gemini 3 Flash",
+        "rpm":     5,
+        "rpd":     20,
+        "tpm":     250_000,
+    },
+]
+class ModelManager:
+    """
+    Tracks per-model rate limits (RPM + RPD) and automatically shuffles
+    to the next available model when a limit is reached.
+    Resets minute/day windows with a sliding window approach.
+    """
+    def __init__(self):
+        self._lock = threading.Lock()
+        # For each model key: deque of UTC timestamps for recent calls
+        self._minute_calls: dict[str, deque] = {m["key"]: deque() for m in MODEL_POOL}
+        self._day_calls:    dict[str, deque] = {m["key"]: deque() for m in MODEL_POOL}
+        # Track which models are in a cooldown (rate-limited by the API itself)
+        self._cooldown_until: dict[str, float] = {m["key"]: 0.0 for m in MODEL_POOL}
+    def _prune(self, dq: deque, window_seconds: int) -> None:
+        """Remove timestamps outside the rolling window."""
+        cutoff = time.time() - window_seconds
+        while dq and dq[0] < cutoff:
+            dq.popleft()
+    def _is_available(self, model: dict) -> bool:
+        key = model["key"]
+        now = time.time()
+        # Hard cooldown (e.g. after a 429)
+        if now < self._cooldown_until[key]:
+            return False
+        self._prune(self._minute_calls[key], 60)
+        self._prune(self._day_calls[key],    86_400)
+        rpm_ok = len(self._minute_calls[key]) < model["rpm"]
+        rpd_ok = len(self._day_calls[key])    < model["rpd"]
+        return rpm_ok and rpd_ok
+    def _record_call(self, key: str) -> None:
+        now = time.time()
+        self._minute_calls[key].append(now)
+        self._day_calls[key].append(now)
+    def _set_cooldown(self, key: str, seconds: int = 65) -> None:
+        """Call this after receiving a 429 to pause that model."""
+        self._cooldown_until[key] = time.time() + seconds
+        print(f"[ModelManager] {key} in cooldown for {seconds}s")
+    def get_available_model(self) -> Optional[dict]:
+        """Return the first model that has remaining quota, or None."""
+        with self._lock:
+            for model in MODEL_POOL:
+                if self._is_available(model):
+                    return model
+        return None
+    def call_with_fallback(self, system_prompt: str) -> tuple[str, str]:
+        """
+        Try each model in order. On success return (response_text, model_key).
+        On 429 / quota error, mark the model as cooled down and try the next.
+        Raises RuntimeError if all models are exhausted.
+        """
+        import google.api_core.exceptions as gex
+        with self._lock:
+            candidates = [m for m in MODEL_POOL if self._is_available(m)]
+        if not candidates:
+            raise RuntimeError("All models are rate-limited. Try again later.")
+        for model_info in candidates:
+            key = model_info["key"]
+            try:
+                genai_model = genai.GenerativeModel(
+                    key,
+                    generation_config={"response_mime_type": "application/json"},
+                )
+                response = genai_model.generate_content(system_prompt)
+                with self._lock:
+                    self._record_call(key)
+                print(f"[ModelManager] Used: {key}")
+                return response.text, key
+            except gex.ResourceExhausted as e:
+                print(f"[ModelManager] 429 on {key}: {e}")
+                with self._lock:
+                    self._set_cooldown(key, seconds=65)
+                continue  # try next model
+            except Exception as e:
+                print(f"[ModelManager] Error on {key}: {e}")
+                continue  # skip broken model, try next
+        raise RuntimeError("All models failed or are rate-limited.")
+    def status(self) -> list[dict]:
+        """Return current usage snapshot for all models (useful for /api/models endpoint)."""
+        now = time.time()
+        result = []
+        with self._lock:
+            for m in MODEL_POOL:
+                key = m["key"]
+                self._prune(self._minute_calls[key], 60)
+                self._prune(self._day_calls[key], 86_400)
+                cooldown_remaining = max(0, self._cooldown_until[key] - now)
+                result.append({
+                    "key":               key,
+                    "name":              m["name"],
+                    "rpm_limit":         m["rpm"],
+                    "rpd_limit":         m["rpd"],
+                    "rpm_used":          len(self._minute_calls[key]),
+                    "rpd_used":          len(self._day_calls[key]),
+                    "available":         self._is_available(m),
+                    "cooldown_seconds":  round(cooldown_remaining),
+                })
+        return result
+# Singleton — import this in main.py
+model_manager = ModelManager()

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+fastapi
+uvicorn
+google-generativeai
+python-dotenv
+google-genai
+transformers
+torch
+torchvision
+torchaudio