Spaces:

TD-jayadeera
/

LLM_test

Build error

File size: 3,945 Bytes

import time
import json
import re
import random
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from huggingface_hub import hf_hub_download
from llama_cpp import Llama

app = FastAPI(title="Sinhala Mithuru HF GGUF")

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"],
)

# 🟢 Model Repository එකෙන් GGUF ගොනුව Download කරගැනීම
# මෙය සිදු වන්නේ Space එක මුලින්ම පණගැන්වෙන විට පමණි.
print("⏳ Downloading model from Hub... This might take a few minutes.")
MODEL_PATH = hf_hub_download(
    repo_id="TD-jayadeera/sinhala-llama3-gguf", 
    filename="llama3-sinhala.Q4_K_M.gguf"
)

print("🚀 Loading GGUF Model on CPU...")
llm = Llama(
    model_path=MODEL_PATH,
    n_ctx=4096,
    n_threads=4, 
    chat_format="llama-3"
)
print("✅ GGUF Model Ready.")

class StoryRequest(BaseModel):
    level: str
    theme: str
    context: str

class QuizRequest(BaseModel):
    story: str
    level: str

@app.post("/generate_story")
async def api_generate_story(req: StoryRequest):
    start_time = time.time()
    
    safe_level = (req.level or "").strip()
    if safe_level == "සරල":
        instruction = "You are an expert primary school teacher in Sri Lanka. Write a simple Spoken Sinhala story for Grade 1-2 children. Exactly 6 sentences."
        temp = 0.3
    else:
        instruction = "You are an expert primary school teacher in Sri Lanka. Write a formal Written Sinhala story for Grade 3-5 children. Exactly 7 sentences."
        temp = 0.4

    prompt = f"මට්ටම: {safe_level} | තේමාව: {req.theme} | සන්දර්භය: {req.context}"
    
    response = llm.create_chat_completion(
        messages=[
            {"role": "system", "content": "You are a helpful assistant that only speaks Sinhala."},
            {"role": "user", "content": f"{instruction}\n\n{prompt}"}
        ],
        temperature=temp,
        max_tokens=1000
    )
    
    story_output = response["choices"][0]["message"]["content"].strip()
    return {"story": story_output, "time": round(time.time() - start_time, 2)}

@app.post("/generate_quiz")
async def api_generate_quiz(req: QuizRequest):
    start_time = time.time()
    safe_level = (req.level or "").strip()
    
    target_opt_count = 3 if safe_level == "සරල" else 4
    instruction = f"Generate exactly {'1 question' if safe_level == 'සරල' else '2 questions'} in Sinhala based on the story. Output strictly as a JSON array."

    response = llm.create_chat_completion(
        messages=[
            {"role": "system", "content": "Output strictly valid JSON."},
            {"role": "user", "content": f"{instruction}\n\nStory: {req.story}"}
        ],
        temperature=0.1
    )
    
    decoded = response["choices"][0]["message"]["content"].strip()
    
    try:
        match = re.search(r'\[.*\]', decoded, re.DOTALL)
        if match:
            raw_quizzes = json.loads(match.group())
        else:
            raw_quizzes = json.loads(decoded)
            
        final_mcq = []
        for q in raw_quizzes:
            opts = q.get("options", [])
            while len(opts) < target_opt_count: opts.append("වෙනත්")
            opts = opts[:target_opt_count]
            if q.get("answer") not in opts: opts[0] = q.get("answer")
            random.shuffle(opts)
            final_mcq.append({
                "question": q.get("question", ""),
                "options": opts,
                "correct_answer": opts.index(q.get("answer")) if q.get("answer") in opts else 0
            })
        return final_mcq
    except:
        return [{"question": "කතාව කියවා අවසන් ද?", "options": ["ඔව්", "නැහැ", "මතක නැහැ"], "correct_answer": 0}]