Spaces:
Runtime error
Runtime error
| # import gradio as gr | |
| # from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor, TextIteratorStreamer | |
| # from threading import Thread | |
| # from qwen_vl_utils import process_vision_info | |
| # import torch | |
| # import time | |
| # # Check if a GPU is available | |
| # device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # local_path = "Fancy-MLLM/R1-OneVision-7B" | |
| # # Load the model on the appropriate device (GPU if available, otherwise CPU) | |
| # model = Qwen2_5_VLForConditionalGeneration.from_pretrained( | |
| # local_path, torch_dtype="auto", device_map=device | |
| # ) | |
| # processor = AutoProcessor.from_pretrained(local_path) | |
| # def generate_output(image, text, button_click): | |
| # # Prepare input data | |
| # messages = [ | |
| # { | |
| # "role": "user", | |
| # "content": [ | |
| # {"type": "image", "image": image, 'min_pixels': 1003520, 'max_pixels': 12845056}, | |
| # {"type": "text", "text": text}, | |
| # ], | |
| # } | |
| # ] | |
| # # Prepare inputs for the model | |
| # text_input = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
| # image_inputs, video_inputs = process_vision_info(messages) | |
| # inputs = processor( | |
| # text=[text_input], | |
| # images=image_inputs, | |
| # videos=video_inputs, | |
| # padding=True, | |
| # return_tensors="pt", | |
| # ) | |
| # # Move inputs to the same device as the model | |
| # inputs = inputs.to(model.device) | |
| # streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True) | |
| # generation_kwargs = dict( | |
| # **inputs, | |
| # streamer=streamer, | |
| # max_new_tokens=4096, | |
| # top_p=0.001, | |
| # top_k=1, | |
| # temperature=0.01, | |
| # repetition_penalty=1.0, | |
| # ) | |
| # thread = Thread(target=model.generate, kwargs=generation_kwargs) | |
| # thread.start() | |
| # generated_text = '' | |
| # try: | |
| # for new_text in streamer: | |
| # generated_text += new_text | |
| # yield f"β{generated_text}" | |
| # except Exception as e: | |
| # print(f"Error: {e}") | |
| # yield f"Error occurred: {str(e)}" | |
| # Css = """ | |
| # #output-markdown { | |
| # overflow-y: auto; | |
| # white-space: pre-wrap; | |
| # word-wrap: break-word; | |
| # } | |
| # #output-markdown .math { | |
| # overflow-x: auto; | |
| # max-width: 100%; | |
| # } | |
| # .markdown-text { | |
| # white-space: pre-wrap; | |
| # word-wrap: break-word; | |
| # } | |
| # .markdown-output { | |
| # min-height: 20vh; | |
| # max-width: 100%; | |
| # overflow-y: auto; | |
| # } | |
| # #qwen-md .katex-display { display: inline; } | |
| # #qwen-md .katex-display>.katex { display: inline; } | |
| # #qwen-md .katex-display>.katex>.katex-html { display: inline; } | |
| # """ | |
| # with gr.Blocks(css=Css) as demo: | |
| # gr.HTML("""<center><font size=8>π¦ R1-OneVision Demo</center>""") | |
| # with gr.Row(): | |
| # with gr.Column(): | |
| # input_image = gr.Image(type="pil", label="Upload") # **ζΉε PIL ε€η** | |
| # input_text = gr.Textbox(label="Input your question") | |
| # with gr.Row(): | |
| # clear_btn = gr.ClearButton([input_image, input_text]) | |
| # submit_btn = gr.Button("Submit", variant="primary") | |
| # with gr.Column(): | |
| # output_text = gr.Markdown(elem_id="qwen-md", container=True, elem_classes="markdown-output") | |
| # submit_btn.click(fn=generate_output, inputs=[input_image, input_text], outputs=output_text) | |
| # demo.launch(share=False) | |
| import gradio as gr | |
| from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration, TextIteratorStreamer | |
| from transformers.image_utils import load_image | |
| from threading import Thread | |
| import time | |
| import torch | |
| import spaces | |
| MODEL_ID = "Fancy-MLLM/R1-OneVision-7B" | |
| processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True) | |
| model = Qwen2_5_VLForConditionalGeneration.from_pretrained( | |
| MODEL_ID, | |
| trust_remote_code=True, | |
| torch_dtype=torch.bfloat16 | |
| ).to("cuda").eval() | |
| def model_inference(input_dict, history): | |
| text = input_dict["text"] | |
| files = input_dict["files"] | |
| # Load images if provided | |
| if len(files) > 1: | |
| images = [load_image(image) for image in files] | |
| elif len(files) == 1: | |
| images = [load_image(files[0])] | |
| else: | |
| images = [] | |
| # Validate input | |
| if text == "" and not images: | |
| gr.Error("Please input a query and optionally image(s).") | |
| return | |
| if text == "" and images: | |
| gr.Error("Please input a text query along with the image(s).") | |
| return | |
| # Prepare messages for the model | |
| messages = [ | |
| { | |
| "role": "user", | |
| "content": [ | |
| *[{"type": "image", "image": image} for image in images], | |
| {"type": "text", "text": text}, | |
| ], | |
| } | |
| ] | |
| # Apply chat template and process inputs | |
| prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
| inputs = processor( | |
| text=[prompt], | |
| images=images if images else None, | |
| return_tensors="pt", | |
| padding=True, | |
| ).to("cuda") | |
| # Set up streamer for real-time output | |
| streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True) | |
| generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=2048) | |
| # Start generation in a separate thread | |
| thread = Thread(target=model.generate, kwargs=generation_kwargs) | |
| thread.start() | |
| # Stream the output | |
| buffer = "" | |
| yield "Thinking..." | |
| for new_text in streamer: | |
| buffer += new_text | |
| time.sleep(0.01) | |
| yield buffer | |
| examples = [ | |
| [{"text": "Hint: Please answer the question and provide the final answer at the end. Question: Which number do you have to write in the last daisy?", "files": ["5.jpg"]}] | |
| ] | |
| demo = gr.ChatInterface( | |
| fn=model_inference, | |
| description="# **π¦ Fancy-MLLM/R1-OneVision-7B**", | |
| examples=examples, | |
| textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple"), | |
| stop_btn="Stop Generation", | |
| multimodal=True, | |
| cache_examples=False, | |
| ) | |
| demo.launch(debug=True) | |