Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration, TextIteratorStreamer | |
| from qwen_vl_utils import process_vision_info | |
| from threading import Thread | |
| import spaces | |
| file_path = "csfufu/Revisual-R1-final" | |
| processor = AutoProcessor.from_pretrained( | |
| file_path, | |
| min_pixels=256*28*28, | |
| max_pixels=1280*28*28 | |
| ) | |
| model = Qwen2_5_VLForConditionalGeneration.from_pretrained( | |
| file_path, torch_dtype="auto", device_map="auto" | |
| ) | |
| def respond( | |
| input_dict, | |
| chat_history, | |
| system_message, | |
| max_tokens, | |
| temperature, | |
| top_p, | |
| ): | |
| text = input_dict["text"] | |
| files = input_dict["files"] | |
| messages = [{ | |
| "role": "system", | |
| "content": system_message | |
| }] | |
| print(chat_history) | |
| for message in chat_history: | |
| if isinstance(message["content"], str): | |
| messages.append({ | |
| "role": message["role"], | |
| "content": [ | |
| { "type": "text", "text": message["content"] }, | |
| ] | |
| }) | |
| else: | |
| messages.append({ | |
| "role": message["role"], | |
| "content": [ | |
| { "type": "image", "image": image } | |
| for image in message["content"] | |
| ] | |
| }) | |
| messages.append( | |
| { | |
| "role": "user", | |
| "content": [ | |
| { "type": "text", "text": text }, | |
| *[{"type": "image", "image": image} for image in files] | |
| ] | |
| } | |
| ) | |
| image_inputs, video_inputs = process_vision_info(messages) | |
| prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
| inputs = processor( | |
| text=[prompt], | |
| images=image_inputs, | |
| videos=video_inputs, | |
| return_tensors="pt", | |
| padding=True, | |
| ).to(model.device) | |
| streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True) | |
| generation_kwargs = dict(**inputs, streamer=streamer, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p) | |
| thread = Thread(target=model.generate, kwargs=generation_kwargs) | |
| thread.start() | |
| buffer = "" | |
| for new_text in streamer: | |
| buffer += new_text | |
| print(new_text, end='') | |
| yield buffer | |
| print() | |
| demo = gr.ChatInterface( | |
| title='Revisual-R1', | |
| type='messages', | |
| chatbot=gr.Chatbot( | |
| type='messages', | |
| # allow_tags=['think'], | |
| sanitize_html=False, | |
| scale=1, | |
| ), | |
| fn=respond, | |
| examples=[[{ | |
| "text": "Solve this question.", | |
| "files": [ "example.png" ] | |
| } | |
| ]], | |
| cache_examples=False, | |
| multimodal=True, | |
| additional_inputs=[ | |
| gr.Textbox(value="You are a friendly Chatbot.", label="System message"), | |
| gr.Slider(minimum=1, maximum=8192, value=512, step=1, label="Max new tokens"), | |
| gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), | |
| gr.Slider( | |
| minimum=0.1, | |
| maximum=1.0, | |
| value=0.95, | |
| step=0.05, | |
| label="Top-p (nucleus sampling)", | |
| ), | |
| ], | |
| ) | |
| demo.launch(debug=True) | |