import torch import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM MODEL_ID = "ibm-granite/granite-3.3-2b-instruct" tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) model = AutoModelForCausalLM.from_pretrained( MODEL_ID, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto" ) def summarize(text): messages = [ { "role": "system", "content": "You are an expert assistant. Summarize the given text into clear, concise bullet points." }, { "role": "user", "content": text } ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate( **inputs, max_new_tokens=200, temperature=0.3, do_sample=False ) summary = tokenizer.decode( outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True ) return summary demo = gr.Interface( fn=summarize, inputs=gr.Textbox( lines=10, placeholder="Paste text to summarize...", label="Input Text" ), outputs=gr.Textbox( lines=8, label="Summary (Bullet Points)" ), title="Text Summarizer (Point-wise)", description="Summarizes input text into clear bullet points using IBM Granite 3.3-2B Instruct." ) if __name__ == "__main__": demo.launch()