Example88 commited on
Commit
8863b2f
Β·
0 Parent(s):

Initial deploy

Browse files
Files changed (3) hide show
  1. README.md +16 -0
  2. app.py +81 -0
  3. requirements.txt +5 -0
README.md ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: GLM-4.6V-Flash API
3
+ emoji: πŸš€
4
+ colorFrom: blue
5
+ colorTo: indigo
6
+ sdk: gradio
7
+ sdk_version: 5.0.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ ---
12
+
13
+ # GLM-4.6V-Flash API Space
14
+
15
+ This Space hosts the [zai-org/GLM-4.6V-Flash](https://huggingface.co/zai-org/GLM-4.6V-Flash) model.
16
+ It provides a Gradio interface and an API for multimodal (text + image) inference.
app.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoProcessor, Glm4vForConditionalGeneration
3
+ import torch
4
+ from PIL import Image
5
+
6
+ # Configuration
7
+ MODEL_PATH = "zai-org/GLM-4.6V-Flash"
8
+
9
+ # Load Model
10
+ print(f"Loading model: {MODEL_PATH}...")
11
+ processor = AutoProcessor.from_pretrained(MODEL_PATH, trust_remote_code=True)
12
+ model = Glm4vForConditionalGeneration.from_pretrained(
13
+ MODEL_PATH,
14
+ torch_dtype=torch.bfloat16,
15
+ low_cpu_mem_usage=True,
16
+ trust_remote_code=True,
17
+ device_map="auto"
18
+ )
19
+ print("Model loaded successfully.")
20
+
21
+ def predict(image, text, history):
22
+ if not text and not image:
23
+ return "Please upload an image or enter text."
24
+
25
+ messages = []
26
+
27
+ # Add history if needed (simplified for this demo to just current turn)
28
+ # Ideally, you'd format history as expected by the model
29
+
30
+ content = []
31
+ if image:
32
+ content.append({"type": "image", "image": image})
33
+ if text:
34
+ content.append({"type": "text", "text": text})
35
+
36
+ messages.append({"role": "user", "content": content})
37
+
38
+ # Prepare inputs
39
+ inputs = processor.apply_chat_template(
40
+ messages,
41
+ add_generation_prompt=True,
42
+ tokenize=True,
43
+ return_dict=True,
44
+ return_tensors="pt"
45
+ ).to(model.device)
46
+
47
+ # Generate
48
+ with torch.no_grad():
49
+ generated_ids = model.generate(**inputs, max_new_tokens=1024, do_sample=True, temperature=0.7)
50
+
51
+ # Decode
52
+ output_text = processor.decode(generated_ids[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
53
+ return output_text
54
+
55
+ # Create Gradio Interface
56
+ with gr.Blocks() as demo:
57
+ gr.Markdown(f"# {MODEL_PATH} API")
58
+
59
+ with gr.Row():
60
+ with gr.Column():
61
+ image_input = gr.Image(type="pil", label="Upload Image (Optional)")
62
+ text_input = gr.Textbox(label="Message")
63
+ submit_btn = gr.Button("Submit")
64
+ with gr.Column():
65
+ output = gr.Markdown(label="Response")
66
+
67
+ submit_btn.click(
68
+ fn=predict,
69
+ inputs=[image_input, text_input, "state"], # passing mock state for now
70
+ outputs=output
71
+ )
72
+
73
+ # API documentation
74
+ gr.Markdown("""
75
+ ### API Usage
76
+ You can use this Space as an API.
77
+ Click 'Use via API' at the bottom of this page for details.
78
+ """)
79
+
80
+ if __name__ == "__main__":
81
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ transformers>=4.46.0
2
+ torch
3
+ accelerate
4
+ pillow
5
+ gradio