import time import json import re import random from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel from huggingface_hub import hf_hub_download from llama_cpp import Llama app = FastAPI(title="Sinhala Mithuru HF GGUF") app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"], ) # 🟢 Model Repository එකෙන් GGUF ගොනුව Download කරගැනීම # මෙය සිදු වන්නේ Space එක මුලින්ම පණගැන්වෙන විට පමණි. print("⏳ Downloading model from Hub... This might take a few minutes.") MODEL_PATH = hf_hub_download( repo_id="TD-jayadeera/sinhala-llama3-gguf", filename="llama3-sinhala.Q4_K_M.gguf" ) print("🚀 Loading GGUF Model on CPU...") llm = Llama( model_path=MODEL_PATH, n_ctx=4096, n_threads=4, chat_format="llama-3" ) print("✅ GGUF Model Ready.") class StoryRequest(BaseModel): level: str theme: str context: str class QuizRequest(BaseModel): story: str level: str @app.post("/generate_story") async def api_generate_story(req: StoryRequest): start_time = time.time() safe_level = (req.level or "").strip() if safe_level == "සරල": instruction = "You are an expert primary school teacher in Sri Lanka. Write a simple Spoken Sinhala story for Grade 1-2 children. Exactly 6 sentences." temp = 0.3 else: instruction = "You are an expert primary school teacher in Sri Lanka. Write a formal Written Sinhala story for Grade 3-5 children. Exactly 7 sentences." temp = 0.4 prompt = f"මට්ටම: {safe_level} | තේමාව: {req.theme} | සන්දර්භය: {req.context}" response = llm.create_chat_completion( messages=[ {"role": "system", "content": "You are a helpful assistant that only speaks Sinhala."}, {"role": "user", "content": f"{instruction}\n\n{prompt}"} ], temperature=temp, max_tokens=1000 ) story_output = response["choices"][0]["message"]["content"].strip() return {"story": story_output, "time": round(time.time() - start_time, 2)} @app.post("/generate_quiz") async def api_generate_quiz(req: QuizRequest): start_time = time.time() safe_level = (req.level or "").strip() target_opt_count = 3 if safe_level == "සරල" else 4 instruction = f"Generate exactly {'1 question' if safe_level == 'සරල' else '2 questions'} in Sinhala based on the story. Output strictly as a JSON array." response = llm.create_chat_completion( messages=[ {"role": "system", "content": "Output strictly valid JSON."}, {"role": "user", "content": f"{instruction}\n\nStory: {req.story}"} ], temperature=0.1 ) decoded = response["choices"][0]["message"]["content"].strip() try: match = re.search(r'\[.*\]', decoded, re.DOTALL) if match: raw_quizzes = json.loads(match.group()) else: raw_quizzes = json.loads(decoded) final_mcq = [] for q in raw_quizzes: opts = q.get("options", []) while len(opts) < target_opt_count: opts.append("වෙනත්") opts = opts[:target_opt_count] if q.get("answer") not in opts: opts[0] = q.get("answer") random.shuffle(opts) final_mcq.append({ "question": q.get("question", ""), "options": opts, "correct_answer": opts.index(q.get("answer")) if q.get("answer") in opts else 0 }) return final_mcq except: return [{"question": "කතාව කියවා අවසන් ද?", "options": ["ඔව්", "නැහැ", "මතක නැහැ"], "correct_answer": 0}]