import os from typing import List, Tuple import torch import gradio as gr from PIL import Image from transformers import AutoProcessor, AutoModelForVision2Seq # ---------- Config ---------- MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2-VL-2B-Instruct") # Smaller CPU-friendly model MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "512")) DISCLAIMER = ( "⚠️ **Disclaimer:** This tool provides general information and is **not** a substitute for " "official emergency guidance. In an emergency, follow directions from local authorities." ) # ---------- Load model ---------- device = "cuda" if torch.cuda.is_available() else "cpu" dtype = torch.float16 if device == "cuda" else torch.float32 print(f"Loading model: {MODEL_NAME} on {device} ({dtype})") processor = AutoProcessor.from_pretrained(MODEL_NAME) model = AutoModelForVision2Seq.from_pretrained( MODEL_NAME, torch_dtype=dtype ).to(device) # ---------- Core logic ---------- DEFAULT_QUESTION = ( "Identify the type of natural disaster in this image and give immediate and long-term " "precautionary / preparedness measures, with a short disclaimer." ) def analyze(image: Image.Image, question: str, history: List[Tuple[str, str]]): if image is None: return history, "❌ Please upload an image first." if not question.strip(): question = DEFAULT_QUESTION # Prepare input for Qwen2-VL inputs = processor(images=image, text=question, return_tensors="pt").to(device) output_ids = model.generate(**inputs, max_new_tokens=MAX_NEW_TOKENS) answer = processor.decode(output_ids[0], skip_special_tokens=True) answer = answer.strip() + "\n\n" + DISCLAIMER history.append((question, answer)) return history, "" def clear(): return [], "" # ---------- Gradio UI ---------- with gr.Blocks(title="Disaster Precaution Chatbot (Qwen2-VL + Gradio)") as demo: gr.Markdown("# 🌪️ Disaster Precaution Chatbot") gr.Markdown( "Upload an image that shows a natural disaster (or its aftermath) and ask for " "precautionary / preparedness measures." ) gr.Markdown(DISCLAIMER) state = gr.State([]) with gr.Row(): with gr.Column(scale=1): img_input = gr.Image(type="pil", label="Upload disaster image") txt_input = gr.Textbox( label="Your question (optional)", placeholder="e.g., What should I do to prepare for this?" ) analyze_btn = gr.Button("Analyze Image", variant="primary") clear_btn = gr.Button("Clear Chat", variant="secondary") with gr.Column(scale=1): chatbot = gr.Chatbot(label="Chatbot", height=480, show_copy_button=True) analyze_btn.click( fn=analyze, inputs=[img_input, txt_input, state], outputs=[chatbot, txt_input] ) clear_btn.click(fn=clear, inputs=[], outputs=[chatbot, txt_input]) if __name__ == "__main__": demo.launch()