Spaces:

Barat123
/

STASS

Running

App Files Files Community

STASS / app.py

Barat123

Update app.py

712d1db verified 7 days ago

raw

history blame contribute delete

7.84 kB

	from docx import Document
	import pytesseract
	from PIL import Image
	import fitz
	import gradio as gr
	import threading
	import pathlib
	import os


	# --------------------------------------------------
	# TOKEN RESOLUTION
	# --------------------------------------------------

	def resolve_token(ui_token):
	if ui_token.strip():
	return ui_token.strip()

	env_token = os.getenv("hgface_tok")
	if env_token:
	return env_token.strip()

	return ""


	# --------------------------------------------------
	# FILE TEXT EXTRACTION
	# --------------------------------------------------

	SUPPORTED_EXT = (
	".pdf", ".docx", ".txt", ".png", ".jpg", ".jpeg", ".webp", ".bmp", ".tiff"
	)

	def extract_text_from_file(filepath):
	if not filepath:
	return ""

	if hasattr(filepath,"name"):
	filepath = filepath.name

	ext = pathlib.Path(filepath).suffix.lower()

	try:
	if ext == ".pdf":
	doc = fitz.open(filepath)
	text = []
	for page in doc:
	text.append(page.get_text())
	return "\n".join(text)

	elif ext == ".docx":
	doc = Document(filepath)
	return "\n".join(p.text for p in doc.paragraphs)

	elif ext == ".txt":
	with open(filepath,"r", encoding="utf-8", errors="ignore") as f:
	return f.read()

	elif ext in (".png", ".jpg", ".jpeg", ".webp", ".bmp", ".tiff"):

	try:
	img = Image.open(filepath)
	return pytesseract.image_to_string(img)

	except Exception as e:
	return "OCR failed: " + str(e)

	else:
	return "Unsupported file type: " + ext

	except Exception as e:
	return "Could not read file: " + str(e)


	# --------------------------------------------------
	# MODELS
	# --------------------------------------------------

	MODELS = {
	"Gemma 3 270M [0.6GB \| Lightning-fast Edge]": "google/gemma-3-270m-it",
	"Qwen 3 0.6B GGUF [0.5GB \| Classroom Assistant]": "Qwen/Qwen3-0.6B-GGUF",
	"TinyLlama 1.1B [0.5GB]": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",

	"Qwen 3.5 2B [2.4GB \| The Student Tutor]": "Qwen/Qwen3.5-2B",
	"Phi-4 Mini [1.8GB \| Logical Powerhouse]": "microsoft/Phi-4-mini-instruct",
	"Gemma 3 1B [2.1GB \| Stable & Coherent]": "google/gemma-3-1b-it",

	"Qwen 3.5 9B [7.8GB \| BEST FOR LESSON PLANS]": "Qwen/Qwen3.5-9B",
	"Llama 3.1 8B [5.2GB \| Industry Standard]": "meta-llama/Meta-Llama-3.1-8B-Instruct",
	"Mistral Small 3 [7.1GB \| Concise & Accurate]": "mistralai/Mistral-Small-3-Instruct",
	"Gemma 3 9B [6.3GB \| Creative & Safe]": "google/gemma-3-9b-it",

	"Mistral Small 12B [9.5GB \| Perfect VRAM Balance]": "mistralai/Mistral-Nemo-Instruct-2407",

	"Qwen 3.5 27B [18GB \| Dense Curriculum Architect]": "Qwen/Qwen3.5-27B",
	}

	ALL_MODEL_NAMES = list(MODELS.keys())


	# --------------------------------------------------
	# PIPELINE CACHE
	# --------------------------------------------------

	_pipeline_cache = {}
	_pipeline_lock = threading.Lock()


	def get_pipeline(model_id, hf_token):

	from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

	with _pipeline_lock:
	if model_id not in _pipeline_cache:
	try:
	kwargs = {
	"trust_remote_code": True
	}

	if hf_token:
	kwargs["token"] = hf_token

	tokenizer = AutoTokenizer.from_pretrained(
	model_id,
	**kwargs
	)

	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	device_map="cpu",
	**kwargs
	)
	pipe = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer
	)

	_pipeline_cache[model_id] = pipe

	except Exception as e:
	return None, str(e)

	return _pipeline_cache[model_id], None


	# --------------------------------------------------
	# INFERENCE
	# --------------------------------------------------

	SYSTEM_MSG = "You are an expert educational assistant. Use markdown."

	def ask_llm(model_label, prompt, hf_token=""):
	token = resolve_token(hf_token)

	model_id = MODELS[model_label]

	pipe, err = get_pipeline(model_id, token)
	if err:
	return "Model load error:\n" + err

	try:
	combined = SYSTEM_MSG + "\n\n" + prompt

	out = pipe(
	combined,
	max_new_tokens=2048,
	do_sample=True,
	temperature=0.6,
	top_p=0.9,
	repetition_penalty=1.15,
	no_repeat_ngram_size=3
	)
	text = out[0]["generated_text"]

	if text.startswith(combined):
	text = text[len(combined):]

	return text.strip()

	except Exception as e:
	return "Inference error:\n" + str(e)


	# --------------------------------------------------
	# PROMPTS
	# --------------------------------------------------

	def make_prompts(topic):
	return {
	"lesson":
	"Create a structured lesson plan for classroom teaching.\n"
	"Include:\n"
	"- Learning objectives\n"
	"- Introduction\n"
	"- Concept explanation\n"
	"- Examples\n"
	"- Case study\n"
	"- Classroom activity\n"
	"- Assessment\n\n"
	"Topic:\n"+topic,

	"qa":
	"Generate 10 exam questions with answers.\n\nTopic:\n"+topic,

	"mcq":
	"Generate 10 MCQs with 4 options and answers.\n\nTopic:\n"+topic,

	"summary":
	"Summarize the topic in 250-300 words.\n\nTopic:\n"+topic,
	}


	def generate_content(text, file, model_label, token):
	file_text = extract_text_from_file(file) if file else ""

	syllabus = (text + "\n\n" + file_text).strip()
	if not syllabus:
	yield ("Provide topic or file","","","","")
	return

	prompts = make_prompts(syllabus)

	WAIT = "Generating..."
	results = [WAIT,WAIT,WAIT,WAIT,WAIT]
	yield tuple(results)

	order = ["lesson", "qa", "mcq", "summary"]

	for i, key in enumerate(order):
	res = ask_llm(model_label, prompts[key], token)
	results[i] = res

	yield tuple(results)


	# --------------------------------------------------
	# UI
	# --------------------------------------------------

	CSS = """
	body,.gradio-container{
	font-family:Inter,sans-serif!important;
	}
	"""


	with gr.Blocks() as demo:
	gr.Markdown("# 🎓 AI Study Material Generator")

	with gr.Row():
	with gr.Column():
	text_input = gr.Textbox(
	placeholder="Paste syllabus or topic",
	lines=6
	)

	file_input = gr.File(
	label="Upload syllabus file"
	)

	with gr.Column():
	model_selector = gr.Dropdown(
	choices=ALL_MODEL_NAMES,
	value=ALL_MODEL_NAMES[0],
	label="Model"
	)

	token_box = gr.Textbox(
	label="HF Token (optional)",
	type="password"
	)

	btn = gr.Button("Generate")

	with gr.Tabs():
	with gr.TabItem("Lesson Plan"):
	lesson = gr.Markdown()

	with gr.TabItem("Q&A"):
	qa = gr.Markdown()

	with gr.TabItem("MCQ"):
	mcq = gr.Markdown()

	with gr.TabItem("Summary"):
	summary = gr.Markdown()

	btn.click(
	fn=generate_content,
	inputs=[text_input,file_input,model_selector,token_box],
	outputs=[lesson, qa, mcq, summary]
	)


	demo.launch(
	theme=gr.themes.Soft(
	primary_hue="indigo",
	secondary_hue="purple"
	),
	css=CSS
	)