Spaces:
Runtime error
Runtime error
| import os | |
| import gradio as gr | |
| from llama_cpp import Llama | |
| # -------- Model config -------- | |
| REPO_ID = os.getenv("GGUF_REPO_ID", "tiiuae/Falcon3-1B-Instruct-GGUF") | |
| FILENAME = os.getenv("GGUF_FILENAME", "Falcon3-1B-Instruct-q4_k_m.gguf") # good CPU balance :contentReference[oaicite:1]{index=1} | |
| # Lazy singleton so the model loads only once per Space runtime | |
| _LLM = None | |
| def get_llm(): | |
| global _LLM | |
| if _LLM is None: | |
| # llama-cpp-python supports downloading GGUFs from Hugging Face directly via from_pretrained :contentReference[oaicite:2]{index=2} | |
| _LLM = Llama.from_pretrained( | |
| repo_id=REPO_ID, | |
| filename=FILENAME, | |
| verbose=False, | |
| # Tweak for CPU Spaces | |
| n_ctx=4096, | |
| n_threads=int(os.getenv("OMP_NUM_THREADS", "4")), | |
| ) | |
| return _LLM | |
| def build_prompt(topic: str, audience: str, num_slides: int, tone: str, time_minutes: int): | |
| # Simple instruct-style format that works broadly with GGUF instruct models. | |
| return f""" | |
| You are a senior presentation writer and speaking coach. | |
| Task: Write a PowerPoint script for the topic below. | |
| Topic: {topic} | |
| Audience: {audience} | |
| Tone: {tone} | |
| Total time: {time_minutes} minutes | |
| Number of slides: {num_slides} | |
| Requirements: | |
| - Output EXACTLY {num_slides} slides. | |
| - For each slide include: | |
| 1) Slide Title | |
| 2) 3–6 bullet points (concise, slide-friendly) | |
| 3) Speaker Notes (what to say, 80–140 words) | |
| - Include a strong opening hook and a clear closing with call-to-action. | |
| - Avoid fluff. Use concrete examples where possible. | |
| - Format strictly like: | |
| SLIDE 1: <title> | |
| Bullets: | |
| - ... | |
| - ... | |
| Speaker Notes: | |
| ... | |
| SLIDE 2: ... | |
| """.strip() | |
| def generate_ppt_script(topic, audience, num_slides, tone, time_minutes, temperature, max_tokens): | |
| if not topic or not topic.strip(): | |
| return "Please enter a topic." | |
| llm = get_llm() | |
| prompt = build_prompt(topic.strip(), audience.strip(), int(num_slides), tone, int(time_minutes)) | |
| # Generate | |
| out = llm( | |
| prompt, | |
| max_tokens=int(max_tokens), | |
| temperature=float(temperature), | |
| top_p=0.95, | |
| stop=["</s>", "SLIDE 999:"], # simple safety stop | |
| ) | |
| text = out["choices"][0]["text"].strip() | |
| return text | |
| with gr.Blocks(title="Falcon3 PPT Script Writer (GGUF)") as demo: | |
| gr.Markdown( | |
| """ | |
| # Falcon3-1B-Instruct (GGUF) — PPT Script Writer | |
| Enter a topic and get a **slide-by-slide deck script** with **speaker notes**. | |
| """ | |
| ) | |
| with gr.Row(): | |
| topic = gr.Textbox(label="Topic", placeholder="e.g., Agentic AI in SRE: reducing incident MTTR", lines=2) | |
| audience = gr.Textbox(label="Audience", placeholder="e.g., SRE + platform engineering leaders", lines=2) | |
| with gr.Row(): | |
| num_slides = gr.Slider(5, 20, value=10, step=1, label="Number of slides") | |
| time_minutes = gr.Slider(5, 60, value=15, step=1, label="Total talk time (minutes)") | |
| tone = gr.Dropdown( | |
| ["Professional", "Conversational", "Persuasive", "Technical Deep Dive", "Executive Summary"], | |
| value="Professional", | |
| label="Tone", | |
| ) | |
| with gr.Accordion("Generation settings", open=False): | |
| temperature = gr.Slider(0.0, 1.2, value=0.6, step=0.05, label="Temperature") | |
| max_tokens = gr.Slider(256, 2048, value=1200, step=64, label="Max output tokens") | |
| run_btn = gr.Button("Generate PPT Script") | |
| output = gr.Textbox(label="PPT Script Output", lines=28) | |
| run_btn.click( | |
| fn=generate_ppt_script, | |
| inputs=[topic, audience, num_slides, tone, time_minutes, temperature, max_tokens], | |
| outputs=output, | |
| ) | |
| demo.queue(default_concurrency_limit=1).launch() | |