Spaces:

odai0
/

silmaQ5

Sleeping

App Files Files Community

odai0 commited on Oct 13, 2025

Commit

b4a4eae

1 Parent(s): e128207

migration

Browse files

Files changed (5) hide show

.gitignore +2 -0
Dockerfile +17 -0
README copy.md +10 -0
app.py +86 -0
requirements.txt +6 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ plan.txt
2	+ .env

Dockerfile ADDED Viewed

	@@ -0,0 +1,17 @@

+FROM python:3.10-slim
+# Install system deps
+RUN apt-get update && apt-get install -y git build-essential && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+COPY . /app
+# Install Python deps
+RUN pip install --upgrade pip
+RUN pip install --prefer-binary llama-cpp-python==0.2.90 fastapi uvicorn huggingface-hub
+# Expose FastAPI port
+EXPOSE 7860
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

README copy.md ADDED Viewed

	@@ -0,0 +1,10 @@

+---
+title: Silma
+emoji: 🦀
+colorFrom: indigo
+colorTo: purple
+sdk: docker
+pinned: false
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,86 @@

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from fastapi.middleware.cors import CORSMiddleware
+from llama_cpp import Llama
+import os
+import json
+app = FastAPI()
+MODE = os.environ.get("MODE", "LLM")
+class MockLLM:
+    def create_chat_completion(self, messages, max_tokens=512, temperature=0):
+        return {
+            "choices": [{
+                "message": {"content": f"[MOCKED RESPONSE] This is a reply"}
+            }]
+        }
+print(f"Running in {MODE} mode")
+if MODE == "MOCK":
+    llm = MockLLM()
+else:
+    llm = Llama.from_pretrained(
+        repo_id="bartowski/SILMA-9B-Instruct-v1.0-GGUF",
+        filename="SILMA-9B-Instruct-v1.0-Q5_K_M.gguf",
+    )
+class PromptRequest(BaseModel):
+    prompt: str
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+@app.get("/")
+def api_home():
+    return {'detail': 'Welcome to FastAPI TextGen Tutorial!'}
+@app.post("/prompt")
+def generate_text(request: PromptRequest):
+    output = llm.create_chat_completion(
+        messages=[
+            {
+                "role": "system",
+                "content": (
+                    "You are an assistant for an accessibility browser extension. "
+                    "Your only task is to return a **valid JSON object** based on the user's request. "
+                    "The JSON must have this format:\n\n"
+                    "{ \"signal\": string, \"message\": string }\n\n"
+                    "Valid signal codes:\n"
+                    "- \"m0\": regular reply\n"
+                    "- \"a0\": request site chunking for analysis\n\n"
+                    "Rules:\n"
+                    "1. Always return JSON, never plain text or explanations.\n"
+                    "2. Do not include extra keys.\n"
+                    "3. Do not escape JSON unnecessarily.\n"
+                    "4. Request chunking using valid signal if user asks for analysis, summarization, or possible actions.\n"
+                    "5. If unsure, default to {\"signal\": \"m0\", \"message\": \"I did not understand the request.\"}"
+                )
+            },
+            {"role": "user", "content": request.prompt}
+        ],
+        max_tokens=512,
+        temperature=0
+    )
+    output_str = output["choices"][0]["message"]["content"]
+    try:
+        output_json = json.loads(output_str)
+    except json.JSONDecodeError:
+        output_json = {"signal": "m0", "message": output_str}
+    return {"output": output_json}
+if __name__ == "__main__" and MODE == "MOCK":
+    import uvicorn
+    uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+fastapi
+uvicorn[standard]
+transformers
+torch
+accelerate
+#llama-cpp-python