Spaces:

MohitG012
/

Voice_Bot_Portfolio_Backend

Sleeping

App Files Files Community

MohitGupta41 commited on Aug 29, 2025

Commit

d4b40f7

1 Parent(s): 19111af

Add application file

Browse files

Files changed (5) hide show

.env +7 -2
Dockerfile +9 -19
app.py +376 -125
requirements.txt +2 -1
start.sh +0 -42

.env CHANGED Viewed

@@ -1,2 +1,7 @@
-# MODEL_NAME=llama3:8b
-MODEL_NAME=mistral:instruct

+# --- Optional fallbacks (only if you DON'T send keys from the client) ---
+GEMINI_API_KEY=your_gemini_key_here
+HF_API_KEY=hf_your_hf_key_here
+# --- Optional default models (used if the request doesn't specify `model`) ---
+DEFAULT_GEMINI_MODEL=gemini-1.5-flash
+DEFAULT_HF_MODEL=google/gemma-3-27b-it

Dockerfile CHANGED Viewed

@@ -1,26 +1,16 @@
 FROM python:3.11-slim
-RUN apt-get update && apt-get install -y curl ca-certificates && rm -rf /var/lib/apt/lists/*
-# Install Ollama
-RUN curl -fsSL https://ollama.com/install.sh | sh
-# Non-root user
-RUN useradd -m -u 1000 appuser
-USER appuser
-# ✅ Use absolute paths here (do NOT use $HOME interpolation)
-ENV HOME=/home/appuser
-ENV PATH="/home/appuser/.local/bin:/usr/local/bin:/usr/local/sbin:/usr/sbin:/usr/bin:/sbin:/bin"
-ENV OLLAMA_MODELS="/home/appuser/.ollama"
-ENV PYTHONDONTWRITEBYTECODE=1
-ENV PYTHONUNBUFFERED=1
-WORKDIR /home/appuser/app
-COPY --chown=appuser requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
-COPY --chown=appuser . .
 EXPOSE 7860
-RUN chmod +x start.sh
-CMD ["./start.sh"]

 FROM python:3.11-slim
+# System deps (certs only)
+RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+# Python deps
+COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
+# App code
+COPY . .
 EXPOSE 7860
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py CHANGED Viewed

@@ -1,22 +1,188 @@
 import os
-import time
 import logging
-from typing import Optional
-from fastapi import FastAPI, HTTPException
 from fastapi.responses import JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel, Field, ConfigDict
-import ollama
-# --- Config ---
-# MODEL_NAME = os.getenv("MODEL_NAME", "llama3.2:3b-instruct-q4_K_M")  # small & CPU-friendly
-# MODEL_NAME = os.getenv("MODEL_NAME", "mistral:instruct")  # small & CPU-friendly
-MODEL_NAME = os.getenv("MODEL_NAME", "smallthinker:latest")  # small & CPU-friendly
-PROFILE_MD_PATH = os.path.join("Data", "profile_data.md")
 logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
 def load_profile_md() -> str:
     if os.path.exists(PROFILE_MD_PATH):
@@ -24,143 +190,228 @@ def load_profile_md() -> str:
             return f.read()
     return ""
-def load_profile_text():
-    with open("Data/profile_data.txt", "r", encoding="utf-8") as f:
-        return f.read()
 PROFILE_MD = load_profile_md()
-# PROFILE_MD = load_profile_text()
-# print(PROFILE_MD)
-SYSTEM_PROMPT = f"""You are Mohit Gupta's AI voice twin, built to assist in interviews and Q&A sessions.
-Your job is to answer truthfully, factually, and in a friendly but professional tone using the context provided.
-The context is formatted in Markdown with sections (e.g., # About Me, ## Projects, ### Features).
-Use these sections to give structured and relevant answers.
-Do not invent details not present in the context. If asked about something outside this context, politely clarify.
 Guidelines:
-- Answer concisely but include specific details when relevant (projects, metrics, tech stack).
-- If multiple related sections exist, combine their info naturally.
-- Do not repeat the entire context; summarize what is relevant to the question.
-- Maintain first-person voice (“I have worked on…”) as you are representing Mohit Gupta.
-Context about Mohit (Markdown format):
 {PROFILE_MD}
-"""
-app = FastAPI(title="Voice Agent API", version="0.1.0")
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["*"],   # tighten for prod
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
 )
 class ChatIn(BaseModel):
-    question: str = Field(..., examples=["Give me a one-line intro about me."])
-    session_id: str | None = Field(None, examples=["abc123"])
     model_config = ConfigDict(json_schema_extra={
-        "examples": [
-            {"question": "Summarize your projects briefly.", "session_id": "demo-1"}
-        ]
     })
 class ChatOut(BaseModel):
     answer: str
-def _ollama_ok(timeout=15):
-    """Wait until ollama serve is ready."""
-    t0 = time.time()
-    while time.time() - t0 < timeout:
-        try:
-            _ = ollama.list()   # hits http://127.0.0.1:11434 by default
-            return True
-        except Exception:
-            time.sleep(0.5)
-    return False
-@app.on_event("startup")
-async def on_start():
-    logger.info(f"Starting API with model: {MODEL_NAME}")
-    if not _ollama_ok():
-        logger.warning("Ollama not ready after wait; requests may fail.")
 @app.get("/")
 def root():
-    return JSONResponse({"ok": True, "message": "Voice Agent API"})
 @app.get("/api/health")
 def health():
-    try:
-        models = [m["name"] for m in ollama.list().get("models", [])]
-        print(ollama.list())
-        return {"ok": True, "model": MODEL_NAME, "available_models": models}
-    except Exception as e:
-        return {"ok": False, "error": str(e)}
-@app.post("/api/chat", response_model=ChatOut,
-          tags=["Chat"], summary="Ask the agent",
-          description="Send a question; returns a concise first-person answer.")
-def chat(payload: ChatIn):
-    try:
-        # res = ollama.chat(
-        #     model=MODEL_NAME,
-        #     messages=[
-        #         {"role": "system", "content": SYSTEM_PROMPT},
-        #         {"role": "user", "content": payload.question},
-        #     ],
-        # )
-        def build_prompt(question: str) -> str:
-        #     return f"""
-        # You are Mohit Gupta's AI voice twin...
-        # ### Guidelines
-        # - Answer concisely...
-        # - First-person voice...
-        # ### Context (use this only; do not invent):
-            return f"""
-        You are Mohit Gupta's AI voice twin, built to assist in interviews and Q&A sessions.
-        Your job is to answer truthfully, factually, and in a friendly but professional tone using the context provided.
-        The context is formatted in Markdown with sections (e.g., # About Me, ## Projects, ### Features).
-        Use these sections to give structured and relevant answers.
-        Do not invent details not present in the context. If asked about something outside this context, politely clarify.
-        Guidelines:
-        - Answer concisely but include specific details when relevant (projects, metrics, tech stack).
-        - If multiple related sections exist, combine their info naturally.
-        - Do not repeat the entire context; summarize what is relevant to the question.
-        - Maintain first-person voice (“I have worked on…”) as you are representing Mohit Gupta.
-        Context about Mohit (Markdown format):
-        {PROFILE_MD}
-        ### Task
-        Answer the user question using ONLY the context above.
-        ### Question
-        {question}
-        """
-        res = ollama.chat(
-            model=MODEL_NAME,
-            messages=[{"role": "user", "content": build_prompt(payload.question)}],
-            options={"num_ctx": 7000}  # give yourself room
-        )
-        print(SYSTEM_PROMPT)
-        print('*'*50)
-        print(res)
-        print('*'*50)
-        print(payload)
-        text = res.get("message", {}).get("content", "").strip()
-        return ChatOut(answer=text or "Sorry, I didn’t catch that.")
-    except Exception as e:
-        # Show a useful error if the model is missing
-        if "model" in str(e).lower() and "not found" in str(e).lower():
-            raise HTTPException(500, f"Model '{MODEL_NAME}' not found in Ollama. "
-                                     f"Make sure it’s pulled at start. Error: {e}")
-        raise

+# import os
+# import time
+# import logging
+# from typing import Optional
+# from fastapi import FastAPI, HTTPException
+# from fastapi.responses import JSONResponse
+# from fastapi.middleware.cors import CORSMiddleware
+# from pydantic import BaseModel, Field, ConfigDict
+# import ollama
+# # --- Config ---
+# # MODEL_NAME = os.getenv("MODEL_NAME", "llama3.2:3b-instruct-q4_K_M")  # small & CPU-friendly
+# # MODEL_NAME = os.getenv("MODEL_NAME", "mistral:instruct")  # small & CPU-friendly
+# MODEL_NAME = os.getenv("MODEL_NAME", "smallthinker:latest")  # small & CPU-friendly
+# PROFILE_MD_PATH = os.path.join("Data", "profile_data.md")
+# logging.basicConfig(level=logging.INFO)
+# logger = logging.getLogger(__name__)
+# def load_profile_md() -> str:
+#     if os.path.exists(PROFILE_MD_PATH):
+#         with open(PROFILE_MD_PATH, "r", encoding="utf-8") as f:
+#             return f.read()
+#     return ""
+# def load_profile_text():
+#     with open("Data/profile_data.txt", "r", encoding="utf-8") as f:
+#         return f.read()
+# PROFILE_MD = load_profile_md()
+# # PROFILE_MD = load_profile_text()
+# # print(PROFILE_MD)
+# SYSTEM_PROMPT = f"""You are Mohit Gupta's AI voice twin, built to assist in interviews and Q&A sessions.
+# Your job is to answer truthfully, factually, and in a friendly but professional tone using the context provided.
+# The context is formatted in Markdown with sections (e.g., # About Me, ## Projects, ### Features).
+# Use these sections to give structured and relevant answers.
+# Do not invent details not present in the context. If asked about something outside this context, politely clarify.
+# Guidelines:
+# - Answer concisely but include specific details when relevant (projects, metrics, tech stack).
+# - If multiple related sections exist, combine their info naturally.
+# - Do not repeat the entire context; summarize what is relevant to the question.
+# - Maintain first-person voice (“I have worked on…”) as you are representing Mohit Gupta.
+# Context about Mohit (Markdown format):
+# {PROFILE_MD}
+# """
+# app = FastAPI(title="Voice Agent API", version="0.1.0")
+# app.add_middleware(
+#     CORSMiddleware,
+#     allow_origins=["*"],   # tighten for prod
+#     allow_credentials=True,
+#     allow_methods=["*"],
+#     allow_headers=["*"],
+# )
+# class ChatIn(BaseModel):
+#     question: str = Field(..., examples=["Give me a one-line intro about me."])
+#     session_id: str | None = Field(None, examples=["abc123"])
+#     model_config = ConfigDict(json_schema_extra={
+#         "examples": [
+#             {"question": "Summarize your projects briefly.", "session_id": "demo-1"}
+#         ]
+#     })
+# class ChatOut(BaseModel):
+#     answer: str
+# def _ollama_ok(timeout=15):
+#     """Wait until ollama serve is ready."""
+#     t0 = time.time()
+#     while time.time() - t0 < timeout:
+#         try:
+#             _ = ollama.list()   # hits http://127.0.0.1:11434 by default
+#             return True
+#         except Exception:
+#             time.sleep(0.5)
+#     return False
+# @app.on_event("startup")
+# async def on_start():
+#     logger.info(f"Starting API with model: {MODEL_NAME}")
+#     if not _ollama_ok():
+#         logger.warning("Ollama not ready after wait; requests may fail.")
+# @app.get("/")
+# def root():
+#     return JSONResponse({"ok": True, "message": "Voice Agent API"})
+# @app.get("/api/health")
+# def health():
+#     try:
+#         models = [m["name"] for m in ollama.list().get("models", [])]
+#         print(ollama.list())
+#         return {"ok": True, "model": MODEL_NAME, "available_models": models}
+#     except Exception as e:
+#         return {"ok": False, "error": str(e)}
+# @app.post("/api/chat", response_model=ChatOut,
+#           tags=["Chat"], summary="Ask the agent",
+#           description="Send a question; returns a concise first-person answer.")
+# def chat(payload: ChatIn):
+#     try:
+#         # res = ollama.chat(
+#         #     model=MODEL_NAME,
+#         #     messages=[
+#         #         {"role": "system", "content": SYSTEM_PROMPT},
+#         #         {"role": "user", "content": payload.question},
+#         #     ],
+#         # )
+#         def build_prompt(question: str) -> str:
+#         #     return f"""
+#         # You are Mohit Gupta's AI voice twin...
+#         # ### Guidelines
+#         # - Answer concisely...
+#         # - First-person voice...
+#         # ### Context (use this only; do not invent):
+#             return f"""
+#         You are Mohit Gupta's AI voice twin, built to assist in interviews and Q&A sessions.
+#         Your job is to answer truthfully, factually, and in a friendly but professional tone using the context provided.
+#         The context is formatted in Markdown with sections (e.g., # About Me, ## Projects, ### Features).
+#         Use these sections to give structured and relevant answers.
+#         Do not invent details not present in the context. If asked about something outside this context, politely clarify.
+#         Guidelines:
+#         - Answer concisely but include specific details when relevant (projects, metrics, tech stack).
+#         - If multiple related sections exist, combine their info naturally.
+#         - Do not repeat the entire context; summarize what is relevant to the question.
+#         - Maintain first-person voice (“I have worked on…”) as you are representing Mohit Gupta.
+#         Context about Mohit (Markdown format):
+#         {PROFILE_MD}
+#         ### Task
+#         Answer the user question using ONLY the context above.
+#         ### Question
+#         {question}
+#         """
+#         res = ollama.chat(
+#             model=MODEL_NAME,
+#             messages=[{"role": "user", "content": build_prompt(payload.question)}],
+#             options={"num_ctx": 7000}  # give yourself room
+#         )
+#         print(SYSTEM_PROMPT)
+#         print('*'*50)
+#         print(res)
+#         print('*'*50)
+#         print(payload)
+#         text = res.get("message", {}).get("content", "").strip()
+#         return ChatOut(answer=text or "Sorry, I didn’t catch that.")
+#     except Exception as e:
+#         # Show a useful error if the model is missing
+#         if "model" in str(e).lower() and "not found" in str(e).lower():
+#             raise HTTPException(500, f"Model '{MODEL_NAME}' not found in Ollama. "
+#                                      f"Make sure it’s pulled at start. Error: {e}")
+#         raise
+# app.py
 import os
 import logging
+from typing import Optional, Literal, Dict, Any
+from fastapi import FastAPI, HTTPException, Header
 from fastapi.responses import JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel, Field, ConfigDict
+import httpx
+# ---------- Config ----------
 logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("voice-agent")
+PROFILE_MD_PATH = os.path.join("Data", "profile_data.md")
 def load_profile_md() -> str:
     if os.path.exists(PROFILE_MD_PATH):
             return f.read()
     return ""
 PROFILE_MD = load_profile_md()
+def build_prompt(question: str) -> str:
+    """Single-message prompt so it works reliably across providers."""
+    return f"""
+You are Mohit Gupta's AI voice twin, built to assist in interviews and Q&A sessions.
+Answer truthfully, factually, and in a friendly but professional tone using ONLY the context provided.
 Guidelines:
+- Be concise but include specifics when relevant (projects, metrics, tech).
+- Combine related details naturally.
+- Do NOT invent facts outside the context.
+- Speak in first person (“I have worked on…”).
+### Context (Markdown)
 {PROFILE_MD}
+### Task
+Answer the question using ONLY the context above.
+### Question
+{question}
+### Answer
+""".strip()
+# ---------- Provider Clients ----------
+# We prefer Gemini by default. If user chooses Hugging Face, we call HF Inference API for the specified model.
+async def call_gemini(
+    api_key: str,
+    model: str,
+    prompt: str,
+    generation_config: Optional[Dict[str, Any]] = None
+) -> str:
+    """
+    Calls Google Gemini via the official python SDK if available; falls back to REST if not.
+    We DON'T log the API key.
+    """
+    generation_config = generation_config or {"temperature": 0.2, "max_output_tokens": 512}
+    try:
+        # Prefer python SDK (google-generativeai)
+        import google.generativeai as genai  # type: ignore
+        genai.configure(api_key=api_key)
+        gm = genai.GenerativeModel(model)
+        resp = gm.generate_content(prompt, generation_config=generation_config)
+        # SDK returns .text on success; may carry safety blocks otherwise.
+        text = getattr(resp, "text", None) or ""
+        if not text:
+            # Try to surface blocked / empty output reasons
+            raise HTTPException(502, "Gemini returned empty response.")
+        return text.strip()
+    except ModuleNotFoundError:
+        # Fallback to REST (models may differ in REST naming, e.g., "models/gemini-1.5-flash")
+        # We’ll try both forms automatically.
+        model_names = [model, f"models/{model}"]
+        last_err = None
+        for m in model_names:
+            url = f"https://generativelanguage.googleapis.com/v1beta/{m}:generateContent"
+            payload = {
+                "contents": [{"parts": [{"text": prompt}]}],
+                "generationConfig": generation_config,
+            }
+            headers = {"x-goog-api-key": api_key}
+            try:
+                async with httpx.AsyncClient(timeout=60) as client:
+                    r = await client.post(url, json=payload, headers=headers)
+                if r.status_code == 200:
+                    data = r.json()
+                    # Extract first candidate text
+                    candidates = (data.get("candidates") or [])
+                    if not candidates:
+                        raise HTTPException(502, f"Gemini returned no candidates: {data}")
+                    parts = candidates[0].get("content", {}).get("parts", [])
+                    text = "".join(p.get("text", "") for p in parts).strip()
+                    if not text:
+                        raise HTTPException(502, "Gemini returned empty text.")
+                    return text
+                else:
+                    last_err = HTTPException(r.status_code, f"Gemini error: {r.text}")
+            except Exception as e:
+                last_err = e
+        # If we got here, all attempts failed
+        raise last_err or HTTPException(502, "Gemini request failed")
+async def call_huggingface_inference(
+    hf_api_key: str,
+    model: str,
+    prompt: str,
+    parameters: Optional[Dict[str, Any]] = None
+) -> str:
+    """
+    Calls Hugging Face Inference API for text generation models (e.g., google/gemma-3-27b-it).
+    """
+    parameters = parameters or {
+        "max_new_tokens": 512,
+        "temperature": 0.2,
+        "return_full_text": False,
+        "repetition_penalty": 1.1,
+    }
+    url = f"https://api-inference.huggingface.co/models/{model}"
+    headers = {"Authorization": f"Bearer {hf_api_key}"}
+    payload = {"inputs": prompt, "parameters": parameters}
+    async with httpx.AsyncClient(timeout=120) as client:
+        r = await client.post(url, headers=headers, json=payload)
+    if r.status_code == 200:
+        data = r.json()
+        # HF returns either a list[{"generated_text": "..."}] or a dict with error/stream info
+        if isinstance(data, list) and data and "generated_text" in data[0]:
+            return data[0]["generated_text"].strip()
+        # Some pipelines return dict with "generated_text"
+        if isinstance(data, dict) and "generated_text" in data:
+            return data["generated_text"].strip()
+        # Some models return plain string
+        if isinstance(data, str):
+            return data.strip()
+        raise HTTPException(502, f"Unexpected HF response format: {data}")
+    elif r.status_code == 503:
+        # Model is loading or warming up
+        raise HTTPException(503, "Hugging Face model is loading. Please retry.")
+    else:
+        raise HTTPException(r.status_code, f"Hugging Face error: {r.text}")
+# ---------- FastAPI ----------
+app = FastAPI(title="Voice Agent API", version="0.2.0")
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=["*"],     # tighten for prod
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
 )
 class ChatIn(BaseModel):
+    question: str = Field(..., examples=["Summarize my projects briefly."])
+    session_id: Optional[str] = Field(None, examples=["demo-1"])
+    # Which provider to use — default Gemini
+    provider: Optional[Literal["gemini", "huggingface"]] = "gemini"
+    # Optional: model override per provider
+    model: Optional[str] = Field(
+        None,
+        examples=["gemini-1.5-flash", "google/gemma-3-27b-it"]
+    )
+    # Per-request API keys (frontend supplies these)
+    gemini_api_key: Optional[str] = None
+    hf_api_key: Optional[str] = None
     model_config = ConfigDict(json_schema_extra={
+        "examples": [{
+            "question": "Give me a one-line intro about me.",
+            "provider": "gemini",
+            "model": "gemini-1.5-flash",
+            "gemini_api_key": "YOUR_GEMINI_KEY"
+        }]
     })
 class ChatOut(BaseModel):
     answer: str
 @app.get("/")
 def root():
+    return JSONResponse({"ok": True, "message": "Voice Agent API (Gemini / Hugging Face)"})
 @app.get("/api/health")
 def health():
+    # No external calls here — just server status & profile presence.
+    return {
+        "ok": True,
+        "profile_loaded": bool(PROFILE_MD),
+        "default_context_chars": len(PROFILE_MD),
+        "providers": {
+            "gemini": "supported",
+            "huggingface": "supported"
+        }
+    }
+@app.post("/api/chat", response_model=ChatOut, tags=["Chat"], summary="Ask the agent")
+async def chat(
+    payload: ChatIn,
+    # optional: accept keys via headers (frontend can send them this way instead of JSON)
+    x_gemini_api_key: Optional[str] = Header(None),
+    x_hf_api_key: Optional[str] = Header(None),
+    authorization: Optional[str] = Header(None),   # e.g. "Bearer hf_xxx"
+):
+    question = payload.question.strip()
+    if not question:
+        raise HTTPException(400, "Question is required.")
+    prompt = build_prompt(question)
+    provider = payload.provider or "gemini"
+    if provider == "gemini":
+        model = payload.model or os.getenv("DEFAULT_GEMINI_MODEL", "gemini-1.5-flash")
+        # choose key from body > header > env
+        gemini_key = payload.gemini_api_key or x_gemini_api_key or os.getenv("GEMINI_API_KEY")
+        if not gemini_key:
+            raise HTTPException(400, "Gemini API key is required (send gemini_api_key or X-Gemini-Api-Key).")
+        text = await call_gemini(gemini_key, model, prompt)
+        return ChatOut(answer=text or "Sorry, I didn't catch that.")
+    elif provider == "huggingface":
+        model = payload.model or os.getenv("DEFAULT_HF_MODEL", "google/gemma-3-27b-it")
+        # choose key from body > header (X-Hf-Api-Key) > Authorization Bearer > env
+        hf_key = payload.hf_api_key or x_hf_api_key
+        if not hf_key and authorization and authorization.lower().startswith("bearer "):
+            hf_key = authorization.split(" ", 1)[1].strip()
+        if not hf_key:
+            hf_key = os.getenv("HF_API_KEY")
+        if not hf_key:
+            raise HTTPException(400, "Hugging Face API key is required (send hf_api_key, X-Hf-Api-Key, or Authorization: Bearer).")
+        text = await call_huggingface_inference(hf_key, model, prompt)
+        return ChatOut(answer=text or "Sorry, I didn't catch that.")
+    else:
+        raise HTTPException(400, f"Unknown provider: {provider}")
+# Optional: peek at the exact prompt we send (for debugging)
+@app.post("/api/debug/prompt")
+def debug_prompt(payload: ChatIn):
+    p = build_prompt(payload.question or "")
+    return {"length": len(p), "preview": p[:1200] + ("…[truncated]" if len(p) > 1200 else "")}

requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
 fastapi
 uvicorn
-ollama
 pydantic

 fastapi
 uvicorn
+httpx
 pydantic
+google-generativeai

start.sh DELETED Viewed

@@ -1,42 +0,0 @@
-#!/usr/bin/env bash
-set -euo pipefail
-echo "HOME=${HOME}"
-echo "PATH=${PATH}"
-echo "OLLAMA_MODELS=${OLLAMA_MODELS:-<not set>}"
-# ✅ Force a safe, writable models dir if it's wrong or unset
-if [ -z "${OLLAMA_MODELS:-}" ] || [ "${OLLAMA_MODELS}" = "/.ollama" ]; then
-  export OLLAMA_MODELS="/home/appuser/.ollama"
-fi
-mkdir -p "${OLLAMA_MODELS}"
-echo "Using OLLAMA_MODELS=${OLLAMA_MODELS}"
-ls -ld "${OLLAMA_MODELS}"
-echo "Starting ollama serve..."
-ollama serve &
-echo -n "Waiting for Ollama"
-for i in $(seq 1 60); do
-  if curl -s http://127.0.0.1:11434/api/tags >/dev/null; then
-    echo " - ready"
-    break
-  fi
-  echo -n "."
-  sleep 1
-  if [ "$i" -eq 60 ]; then
-    echo "Failed to start Ollama in time"; exit 1
-  fi
-done
-# MODEL_TAG="${MODEL_NAME:-llama3.2:3b-instruct-q4_K_M}"
-# MODEL_TAG="${MODEL_NAME:-mistral:instruct}"
-MODEL_TAG="${MODEL_NAME:-smallthinker:latest}"
-if ! ollama list | grep -q "$MODEL_TAG"; then
-  echo "Pulling model: $MODEL_TAG"
-  ollama pull "$MODEL_TAG"
-fi
-echo "Starting FastAPI on :7860"
-exec uvicorn app:app --host 0.0.0.0 --port 7860 --workers 1