odai0 commited on
Commit
b4a4eae
·
1 Parent(s): e128207
Files changed (5) hide show
  1. .gitignore +2 -0
  2. Dockerfile +17 -0
  3. README copy.md +10 -0
  4. app.py +86 -0
  5. requirements.txt +6 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ plan.txt
2
+ .env
Dockerfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ # Install system deps
4
+ RUN apt-get update && apt-get install -y git build-essential && rm -rf /var/lib/apt/lists/*
5
+
6
+ WORKDIR /app
7
+ COPY . /app
8
+
9
+ # Install Python deps
10
+ RUN pip install --upgrade pip
11
+ RUN pip install --prefer-binary llama-cpp-python==0.2.90 fastapi uvicorn huggingface-hub
12
+
13
+
14
+ # Expose FastAPI port
15
+ EXPOSE 7860
16
+
17
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README copy.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Silma
3
+ emoji: 🦀
4
+ colorFrom: indigo
5
+ colorTo: purple
6
+ sdk: docker
7
+ pinned: false
8
+ ---
9
+
10
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from llama_cpp import Llama
5
+ import os
6
+ import json
7
+
8
+ app = FastAPI()
9
+ MODE = os.environ.get("MODE", "LLM")
10
+
11
+ class MockLLM:
12
+ def create_chat_completion(self, messages, max_tokens=512, temperature=0):
13
+ return {
14
+ "choices": [{
15
+ "message": {"content": f"[MOCKED RESPONSE] This is a reply"}
16
+ }]
17
+ }
18
+
19
+ print(f"Running in {MODE} mode")
20
+
21
+ if MODE == "MOCK":
22
+ llm = MockLLM()
23
+ else:
24
+ llm = Llama.from_pretrained(
25
+ repo_id="bartowski/SILMA-9B-Instruct-v1.0-GGUF",
26
+ filename="SILMA-9B-Instruct-v1.0-Q5_K_M.gguf",
27
+ )
28
+
29
+ class PromptRequest(BaseModel):
30
+ prompt: str
31
+
32
+
33
+ app.add_middleware(
34
+ CORSMiddleware,
35
+ allow_origins=["*"],
36
+ allow_credentials=True,
37
+ allow_methods=["*"],
38
+ allow_headers=["*"],
39
+ )
40
+
41
+
42
+ @app.get("/")
43
+ def api_home():
44
+ return {'detail': 'Welcome to FastAPI TextGen Tutorial!'}
45
+
46
+
47
+ @app.post("/prompt")
48
+ def generate_text(request: PromptRequest):
49
+ output = llm.create_chat_completion(
50
+ messages=[
51
+ {
52
+ "role": "system",
53
+ "content": (
54
+ "You are an assistant for an accessibility browser extension. "
55
+ "Your only task is to return a **valid JSON object** based on the user's request. "
56
+ "The JSON must have this format:\n\n"
57
+ "{ \"signal\": string, \"message\": string }\n\n"
58
+ "Valid signal codes:\n"
59
+ "- \"m0\": regular reply\n"
60
+ "- \"a0\": request site chunking for analysis\n\n"
61
+ "Rules:\n"
62
+ "1. Always return JSON, never plain text or explanations.\n"
63
+ "2. Do not include extra keys.\n"
64
+ "3. Do not escape JSON unnecessarily.\n"
65
+ "4. Request chunking using valid signal if user asks for analysis, summarization, or possible actions.\n"
66
+ "5. If unsure, default to {\"signal\": \"m0\", \"message\": \"I did not understand the request.\"}"
67
+ )
68
+ },
69
+ {"role": "user", "content": request.prompt}
70
+ ],
71
+ max_tokens=512,
72
+ temperature=0
73
+ )
74
+
75
+ output_str = output["choices"][0]["message"]["content"]
76
+ try:
77
+ output_json = json.loads(output_str)
78
+ except json.JSONDecodeError:
79
+ output_json = {"signal": "m0", "message": output_str}
80
+
81
+ return {"output": output_json}
82
+
83
+
84
+ if __name__ == "__main__" and MODE == "MOCK":
85
+ import uvicorn
86
+ uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ transformers
4
+ torch
5
+ accelerate
6
+ #llama-cpp-python