Spaces:

neel692
/

GPT-OSS

Sleeping

App Files Files Community

NeelTA commited on Sep 3, 2025

Commit

4d7a96c

1 Parent(s): 7caf0f5

initial commit

Browse files

Files changed (4) hide show

.gitignore +53 -0
Dockerfile +25 -0
app.py +70 -0
requirements.txt +3 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,53 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# Virtual environments
+venv/
+ENV/
+env/
+.venv/
+.env/
+# Pytest cache
+.pytest_cache/
+# VS Code settings
+.vscode/
+# IDE settings
+.idea/
+# Logs
+*.log
+# OS files
+.DS_Store
+Thumbs.db
+# Environment variables
+.env

Dockerfile ADDED Viewed

	@@ -0,0 +1,25 @@

+# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
+# you will also find guides on how best to write your Dockerfile
+FROM python:3.9
+# Create user
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+WORKDIR /app
+# Install Ollama (CRITICAL FOR YOUR APP)
+RUN wget -qO- https://ollama.com/install.sh | sh
+# Pull your model
+RUN ollama pull gpt-oss:20b
+# Install Python dependencies
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+# Copy application code
+COPY --chown=user . /app
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,70 @@

+from fastapi import FastAPI, Request, HTTPException
+from fastapi.responses import StreamingResponse, FileResponse
+from fastapi.staticfiles import StaticFiles
+import httpx
+import json
+app = FastAPI()
+# Serve chat.html at root
+@app.get("/")
+async def chat_page():
+    return FileResponse("templates/chat.html")
+# Your existing streaming endpoint (simplified)
+@app.post("/stream_chat")
+async def stream_chat(request: Request):
+    data = await request.json()
+    prompt = data.get("prompt")
+    if not prompt:
+        raise HTTPException(status_code=400, detail="Missing 'prompt'")
+    # Use gpt-oss:2b as requested
+    model = "gpt-oss:20b"
+    async def event_generator():
+        try:
+            url = "http://localhost:11434/api/chat"
+            payload = {
+                "model": model,
+                "messages": [
+                    {"role": "system", "content": "You are a thoughtful assistant."},
+                    {"role": "user", "content": prompt}
+                ],
+                "stream": True,
+                "options": {
+                    "num_predict": 256,
+                    "num_ctx": 4096
+                }
+            }
+            async with httpx.AsyncClient() as client:
+                async with client.stream("POST", url, json=payload, timeout=None) as resp:
+                    resp.raise_for_status()
+                    async for line in resp.aiter_lines():
+                        if not line or not line.strip():
+                            continue
+                        try:
+                            chunk = json.loads(line)
+                            content = chunk.get("message", {}).get("content", "")
+                            if content:
+                                yield content
+                        except json.JSONDecodeError:
+                            continue
+        except httpx.HTTPStatusError as e:
+            # Capture Ollama-generated error
+            error_msg = f"Ollama API returned error: {e.response.status_code} - {e.response.text}"
+            yield f"[error]\n{error_msg}"
+    return StreamingResponse(
+        event_generator(),
+        media_type="text/plain"
+    )
+# For Hugging Face Spaces compatibility
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+httpx
+fastapi
+uvicorn[standard]