Spaces:

TrabbyPatty
/

Rai

Runtime error

App Files Files Community

TrabbyPatty commited on Sep 17, 2025

Commit

d4a67f1

verified ·

1 Parent(s): 72fdcb1

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -18

app.py CHANGED Viewed

@@ -39,33 +39,59 @@ You are a strict flashcard generator.
 - Always follow the requested format exactly.
 <</SYS>>"""
-# === Generation function ===
-def generate(user_input, max_new_tokens=800, temperature=0.5):
     prompt = (
         f"<s>[INST] {SYSTEM_MESSAGE}\n\n"
-        f"Create flashcards, T/F, MCQ, and study guides strictly using only the information provided.\n\n"
         f"Input: {user_input}[/INST]\nOutput:"
     )
-    output = pipe(
-        prompt,
-        max_new_tokens=max_new_tokens,
-        temperature=temperature,
-        repetition_penalty=1.05,
-        do_sample=True
-    )
-    return output[0]["generated_text"]
-# === Gradio UI ===
 demo = gr.Interface(
-    fn=generate,
     inputs=[
-        gr.Textbox(label="Topic / Input", lines=6, placeholder="Paste study material here..."),
-        gr.Slider(50, 800, value=200, step=10, label="Max New Tokens"),
-        gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature"),
     ],
     outputs="text",
     title="Flashcard Generator (Mistral-7B LoRA)",
-    description="Generates flashcard-style study aids using only the provided text."
 )
-demo.launch()

 - Always follow the requested format exactly.
 <</SYS>>"""
+# ✅ Load model + tokenizer
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    torch_dtype=torch.float16,
+    device_map="auto",
+    load_in_4bit=True   # helps fit on ZeroGPU
+)
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+def generate_flashcards(user_input, max_new_tokens=600, temperature=0.5):
+    # Format the prompt with system + user input
     prompt = (
         f"<s>[INST] {SYSTEM_MESSAGE}\n\n"
+        f"Create a variety of study aids with 10 items each, strictly using only the information provided.\n\n"
         f"Input: {user_input}[/INST]\nOutput:"
     )
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=max_new_tokens,
+            temperature=temperature,
+            do_sample=False,
+            repetition_penalty=1.05,
+            pad_token_id=tokenizer.eos_token_id,
+            eos_token_id=tokenizer.eos_token_id,
+        )
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Extract only the Output section
+    if "Output:" in response:
+        final_answer = response.split("Output:")[-1].strip()
+    else:
+        final_answer = response.strip()
+    return final_answer
+# ✅ Gradio UI
 demo = gr.Interface(
+    fn=generate_flashcards,
     inputs=[
+        gr.Textbox(label="Enter study text", lines=8, placeholder="Paste your study material here..."),
+        gr.Slider(100, 1000, value=600, step=50, label="Max New Tokens"),
+        gr.Slider(0.1, 1.0, value=0.5, step=0.1, label="Temperature"),
     ],
     outputs="text",
     title="Flashcard Generator (Mistral-7B LoRA)",
+    description="Paste study material and generate flashcards. Model strictly extracts only from input."
 )
+if __name__ == "__main__":
+    demo.launch()