LLM_test / app.py
TD-jayadeera's picture
Update app.py
5976c7c verified
import time
import json
import re
import random
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
app = FastAPI(title="Sinhala Mithuru HF GGUF")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
# 🟢 Model Repository එකෙන් GGUF ගොනුව Download කරගැනීම
# මෙය සිදු වන්නේ Space එක මුලින්ම පණගැන්වෙන විට පමණි.
print("⏳ Downloading model from Hub... This might take a few minutes.")
MODEL_PATH = hf_hub_download(
repo_id="TD-jayadeera/sinhala-llama3-gguf",
filename="llama3-sinhala.Q4_K_M.gguf"
)
print("🚀 Loading GGUF Model on CPU...")
llm = Llama(
model_path=MODEL_PATH,
n_ctx=4096,
n_threads=4,
chat_format="llama-3"
)
print("✅ GGUF Model Ready.")
class StoryRequest(BaseModel):
level: str
theme: str
context: str
class QuizRequest(BaseModel):
story: str
level: str
@app.post("/generate_story")
async def api_generate_story(req: StoryRequest):
start_time = time.time()
safe_level = (req.level or "").strip()
if safe_level == "සරල":
instruction = "You are an expert primary school teacher in Sri Lanka. Write a simple Spoken Sinhala story for Grade 1-2 children. Exactly 6 sentences."
temp = 0.3
else:
instruction = "You are an expert primary school teacher in Sri Lanka. Write a formal Written Sinhala story for Grade 3-5 children. Exactly 7 sentences."
temp = 0.4
prompt = f"මට්ටම: {safe_level} | තේමාව: {req.theme} | සන්දර්භය: {req.context}"
response = llm.create_chat_completion(
messages=[
{"role": "system", "content": "You are a helpful assistant that only speaks Sinhala."},
{"role": "user", "content": f"{instruction}\n\n{prompt}"}
],
temperature=temp,
max_tokens=1000
)
story_output = response["choices"][0]["message"]["content"].strip()
return {"story": story_output, "time": round(time.time() - start_time, 2)}
@app.post("/generate_quiz")
async def api_generate_quiz(req: QuizRequest):
start_time = time.time()
safe_level = (req.level or "").strip()
target_opt_count = 3 if safe_level == "සරල" else 4
instruction = f"Generate exactly {'1 question' if safe_level == 'සරල' else '2 questions'} in Sinhala based on the story. Output strictly as a JSON array."
response = llm.create_chat_completion(
messages=[
{"role": "system", "content": "Output strictly valid JSON."},
{"role": "user", "content": f"{instruction}\n\nStory: {req.story}"}
],
temperature=0.1
)
decoded = response["choices"][0]["message"]["content"].strip()
try:
match = re.search(r'\[.*\]', decoded, re.DOTALL)
if match:
raw_quizzes = json.loads(match.group())
else:
raw_quizzes = json.loads(decoded)
final_mcq = []
for q in raw_quizzes:
opts = q.get("options", [])
while len(opts) < target_opt_count: opts.append("වෙනත්")
opts = opts[:target_opt_count]
if q.get("answer") not in opts: opts[0] = q.get("answer")
random.shuffle(opts)
final_mcq.append({
"question": q.get("question", ""),
"options": opts,
"correct_answer": opts.index(q.get("answer")) if q.get("answer") in opts else 0
})
return final_mcq
except:
return [{"question": "කතාව කියවා අවසන් ද?", "options": ["ඔව්", "නැහැ", "මතක නැහැ"], "correct_answer": 0}]