zhangbaoxin commited on
Commit
4142af5
·
verified ·
1 Parent(s): 7ac82cf
Files changed (1) hide show
  1. app.py +86 -53
app.py CHANGED
@@ -1,70 +1,103 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
 
 
 
 
3
 
 
 
 
4
 
5
- def respond(
6
- message,
7
- history: list[dict[str, str]],
8
- system_message,
9
- max_tokens,
10
- temperature,
11
- top_p,
12
- hf_token: gr.OAuthToken,
13
- ):
 
 
 
 
14
  """
15
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
16
  """
17
- client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
 
18
 
19
- messages = [{"role": "system", "content": system_message}]
 
20
 
21
- messages.extend(history)
 
 
 
 
 
 
 
 
 
22
 
23
- messages.append({"role": "user", "content": message})
 
 
 
 
 
 
 
 
 
 
24
 
25
- response = ""
 
26
 
27
- for message in client.chat_completion(
28
- messages,
29
- max_tokens=max_tokens,
30
- stream=True,
31
- temperature=temperature,
32
- top_p=top_p,
33
- ):
34
- choices = message.choices
35
- token = ""
36
- if len(choices) and choices[0].delta.content:
37
- token = choices[0].delta.content
 
 
 
 
38
 
39
- response += token
40
- yield response
 
 
 
 
 
 
 
41
 
 
 
 
 
 
 
 
42
 
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- chatbot = gr.ChatInterface(
47
- respond,
48
- type="messages",
49
- additional_inputs=[
50
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
51
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
52
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
53
- gr.Slider(
54
- minimum=0.1,
55
- maximum=1.0,
56
- value=0.95,
57
- step=0.05,
58
- label="Top-p (nucleus sampling)",
59
- ),
60
- ],
61
- )
62
 
63
- with gr.Blocks() as demo:
64
- with gr.Sidebar():
65
- gr.LoginButton()
66
- chatbot.render()
67
 
68
 
69
  if __name__ == "__main__":
70
- demo.launch()
 
1
  import gradio as gr
2
+ import torch
3
+ from PIL import Image
4
+ import requests
5
+ from io import BytesIO
6
+ from transformers import Qwen3VLForConditionalGeneration, AutoProcessor
7
 
8
+ # --- Configuration ---
9
+ MODEL_PATH = "zhangbaoxin/qwen3-vl-2b-package_unsloth_finetune"
10
+ CPU_DEVICE = "cpu"
11
 
12
+ # --- Model and Processor Loading ---
13
+ print("Loading model and processor... This will take a few minutes on a CPU.")
14
+ processor = AutoProcessor.from_pretrained(MODEL_PATH, trust_remote_code=True)
15
+ model = Qwen3VLForConditionalGeneration.from_pretrained(
16
+ MODEL_PATH,
17
+ trust_remote_code=True,
18
+ dtype="auto", # Use 'auto' for dtype for better compatibility
19
+ device_map="auto" # This is the key for CPU (and GPU) compatibility
20
+ )
21
+ print("Model and processor loaded successfully.")
22
+
23
+ # --- Inference Function ---
24
+ def process_and_generate(image_input, text_prompt):
25
  """
26
+ Processes the image and text prompt, and generates a response from the model.
27
  """
28
+ if image_input is None or not text_prompt.strip():
29
+ return "Please provide both an image and a text prompt."
30
 
31
+ # Convert Gradio's numpy array to a PIL Image
32
+ pil_image = Image.fromarray(image_input)
33
 
34
+ # Prepare the messages payload for the model
35
+ messages = [
36
+ {
37
+ "role": "user",
38
+ "content": [
39
+ {"type": "image", "image": pil_image},
40
+ {"type": "text", "text": text_prompt},
41
+ ],
42
+ }
43
+ ]
44
 
45
+ print("Processing inputs and generating response... This will be slow.")
46
+ try:
47
+ # Preparation for inference
48
+ inputs = processor.apply_chat_template(
49
+ messages,
50
+ tokenize=True,
51
+ add_generation_prompt=True,
52
+ return_dict=True,
53
+ return_tensors="pt"
54
+ )
55
+ inputs = inputs.to(model.device)
56
 
57
+ # Inference: Generation of the output
58
+ generated_ids = model.generate(**inputs, max_new_tokens=1024)
59
 
60
+ # To get only the new tokens, we trim the input IDs from the generated IDs
61
+ generated_ids_trimmed = [
62
+ out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
63
+ ]
64
+
65
+ # Decode the trimmed IDs to text
66
+ output_text = processor.batch_decode(
67
+ generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
68
+ )
69
+
70
+ # batch_decode returns a list, we return the first element
71
+ return output_text[0]
72
+
73
+ except Exception as e:
74
+ return f"An error occurred during generation: {str(e)}"
75
 
76
+ # --- Gradio Interface ---
77
+ with gr.Blocks() as demo:
78
+ gr.Markdown(
79
+ """
80
+ # Qwen3-VL-2B-Instruct CPU Demo
81
+ This Space runs the `Qwen/Qwen3-VL-2B-Instruct` model using the standard `transformers` library.
82
+ **Warning:** Running this on a free CPU Space is **very slow**. Duplicate this space for solo experience.
83
+ """
84
+ )
85
 
86
+ with gr.Row():
87
+ with gr.Column():
88
+ image_input = gr.Image(type="numpy", label="Upload Image")
89
+ text_prompt = gr.Textbox(label="Prompt", placeholder="e.g., Describe this image in detail.")
90
+ submit_button = gr.Button("Generate Response")
91
+ with gr.Column():
92
+ output_text = gr.Textbox(label="Model Output", lines=10, interactive=False)
93
 
94
+ submit_button.click(
95
+ fn=process_and_generate,
96
+ inputs=[image_input, text_prompt],
97
+ outputs=output_text
98
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
 
 
 
 
100
 
101
 
102
  if __name__ == "__main__":
103
+ demo.launch()