Spaces:

TD-jayadeera
/

LLM_test

Build error

App Files Files Community

LLM_test / app.py

TD-jayadeera

Update app.py

5976c7c verified 2 months ago

raw

history blame contribute delete

3.95 kB

	import time
	import json
	import re
	import random
	from fastapi import FastAPI
	from fastapi.middleware.cors import CORSMiddleware
	from pydantic import BaseModel
	from huggingface_hub import hf_hub_download
	from llama_cpp import Llama

	app = FastAPI(title="Sinhala Mithuru HF GGUF")

	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# 🟢 Model Repository එකෙන් GGUF ගොනුව Download කරගැනීම
	# මෙය සිදු වන්නේ Space එක මුලින්ම පණගැන්වෙන විට පමණි.
	print("⏳ Downloading model from Hub... This might take a few minutes.")
	MODEL_PATH = hf_hub_download(
	repo_id="TD-jayadeera/sinhala-llama3-gguf",
	filename="llama3-sinhala.Q4_K_M.gguf"
	)

	print("🚀 Loading GGUF Model on CPU...")
	llm = Llama(
	model_path=MODEL_PATH,
	n_ctx=4096,
	n_threads=4,
	chat_format="llama-3"
	)
	print("✅ GGUF Model Ready.")

	class StoryRequest(BaseModel):
	level: str
	theme: str
	context: str

	class QuizRequest(BaseModel):
	story: str
	level: str

	@app.post("/generate_story")
	async def api_generate_story(req: StoryRequest):
	start_time = time.time()

	safe_level = (req.level or "").strip()
	if safe_level == "සරල":
	instruction = "You are an expert primary school teacher in Sri Lanka. Write a simple Spoken Sinhala story for Grade 1-2 children. Exactly 6 sentences."
	temp = 0.3
	else:
	instruction = "You are an expert primary school teacher in Sri Lanka. Write a formal Written Sinhala story for Grade 3-5 children. Exactly 7 sentences."
	temp = 0.4

	prompt = f"මට්ටම: {safe_level} \| තේමාව: {req.theme} \| සන්දර්භය: {req.context}"

	response = llm.create_chat_completion(
	messages=[
	{"role": "system", "content": "You are a helpful assistant that only speaks Sinhala."},
	{"role": "user", "content": f"{instruction}\n\n{prompt}"}
	],
	temperature=temp,
	max_tokens=1000
	)

	story_output = response["choices"][0]["message"]["content"].strip()
	return {"story": story_output, "time": round(time.time() - start_time, 2)}

	@app.post("/generate_quiz")
	async def api_generate_quiz(req: QuizRequest):
	start_time = time.time()
	safe_level = (req.level or "").strip()

	target_opt_count = 3 if safe_level == "සරල" else 4
	instruction = f"Generate exactly {'1 question' if safe_level == 'සරල' else '2 questions'} in Sinhala based on the story. Output strictly as a JSON array."

	response = llm.create_chat_completion(
	messages=[
	{"role": "system", "content": "Output strictly valid JSON."},
	{"role": "user", "content": f"{instruction}\n\nStory: {req.story}"}
	],
	temperature=0.1
	)

	decoded = response["choices"][0]["message"]["content"].strip()

	try:
	match = re.search(r'\[.*\]', decoded, re.DOTALL)
	if match:
	raw_quizzes = json.loads(match.group())
	else:
	raw_quizzes = json.loads(decoded)

	final_mcq = []
	for q in raw_quizzes:
	opts = q.get("options", [])
	while len(opts) < target_opt_count: opts.append("වෙනත්")
	opts = opts[:target_opt_count]
	if q.get("answer") not in opts: opts[0] = q.get("answer")
	random.shuffle(opts)
	final_mcq.append({
	"question": q.get("question", ""),
	"options": opts,
	"correct_answer": opts.index(q.get("answer")) if q.get("answer") in opts else 0
	})
	return final_mcq
	except:
	return [{"question": "කතාව කියවා අවසන් ද?", "options": ["ඔව්", "නැහැ", "මතක නැහැ"], "correct_answer": 0}]