Spaces:
Build error
Build error
File size: 3,945 Bytes
a2c9588 5976c7c 176b7e6 a2c9588 176b7e6 a2c9588 5976c7c a2c9588 176b7e6 5976c7c 176b7e6 a2c9588 176b7e6 a2c9588 176b7e6 a2c9588 176b7e6 a2c9588 176b7e6 a2c9588 176b7e6 a2c9588 176b7e6 a2c9588 176b7e6 a2c9588 176b7e6 a2c9588 176b7e6 a2c9588 176b7e6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 | import time
import json
import re
import random
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
app = FastAPI(title="Sinhala Mithuru HF GGUF")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
# 🟢 Model Repository එකෙන් GGUF ගොනුව Download කරගැනීම
# මෙය සිදු වන්නේ Space එක මුලින්ම පණගැන්වෙන විට පමණි.
print("⏳ Downloading model from Hub... This might take a few minutes.")
MODEL_PATH = hf_hub_download(
repo_id="TD-jayadeera/sinhala-llama3-gguf",
filename="llama3-sinhala.Q4_K_M.gguf"
)
print("🚀 Loading GGUF Model on CPU...")
llm = Llama(
model_path=MODEL_PATH,
n_ctx=4096,
n_threads=4,
chat_format="llama-3"
)
print("✅ GGUF Model Ready.")
class StoryRequest(BaseModel):
level: str
theme: str
context: str
class QuizRequest(BaseModel):
story: str
level: str
@app.post("/generate_story")
async def api_generate_story(req: StoryRequest):
start_time = time.time()
safe_level = (req.level or "").strip()
if safe_level == "සරල":
instruction = "You are an expert primary school teacher in Sri Lanka. Write a simple Spoken Sinhala story for Grade 1-2 children. Exactly 6 sentences."
temp = 0.3
else:
instruction = "You are an expert primary school teacher in Sri Lanka. Write a formal Written Sinhala story for Grade 3-5 children. Exactly 7 sentences."
temp = 0.4
prompt = f"මට්ටම: {safe_level} | තේමාව: {req.theme} | සන්දර්භය: {req.context}"
response = llm.create_chat_completion(
messages=[
{"role": "system", "content": "You are a helpful assistant that only speaks Sinhala."},
{"role": "user", "content": f"{instruction}\n\n{prompt}"}
],
temperature=temp,
max_tokens=1000
)
story_output = response["choices"][0]["message"]["content"].strip()
return {"story": story_output, "time": round(time.time() - start_time, 2)}
@app.post("/generate_quiz")
async def api_generate_quiz(req: QuizRequest):
start_time = time.time()
safe_level = (req.level or "").strip()
target_opt_count = 3 if safe_level == "සරල" else 4
instruction = f"Generate exactly {'1 question' if safe_level == 'සරල' else '2 questions'} in Sinhala based on the story. Output strictly as a JSON array."
response = llm.create_chat_completion(
messages=[
{"role": "system", "content": "Output strictly valid JSON."},
{"role": "user", "content": f"{instruction}\n\nStory: {req.story}"}
],
temperature=0.1
)
decoded = response["choices"][0]["message"]["content"].strip()
try:
match = re.search(r'\[.*\]', decoded, re.DOTALL)
if match:
raw_quizzes = json.loads(match.group())
else:
raw_quizzes = json.loads(decoded)
final_mcq = []
for q in raw_quizzes:
opts = q.get("options", [])
while len(opts) < target_opt_count: opts.append("වෙනත්")
opts = opts[:target_opt_count]
if q.get("answer") not in opts: opts[0] = q.get("answer")
random.shuffle(opts)
final_mcq.append({
"question": q.get("question", ""),
"options": opts,
"correct_answer": opts.index(q.get("answer")) if q.get("answer") in opts else 0
})
return final_mcq
except:
return [{"question": "කතාව කියවා අවසන් ද?", "options": ["ඔව්", "නැහැ", "මතක නැහැ"], "correct_answer": 0}] |