malusama commited on
Commit
fba79e3
·
verified ·
1 Parent(s): a157459

Initial CPU Basic Gradio Space

Browse files
Files changed (4) hide show
  1. README.md +19 -5
  2. __pycache__/app.cpython-311.pyc +0 -0
  3. app.py +128 -0
  4. requirements.txt +16 -0
README.md CHANGED
@@ -1,12 +1,26 @@
1
  ---
2
- title: M2 Encoder 0.4B Space
3
- emoji: 👁
4
- colorFrom: purple
5
  colorTo: green
6
  sdk: gradio
7
- sdk_version: 6.9.0
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: M2-Encoder 0.4B Demo
3
+ emoji: 🖼️
4
+ colorFrom: blue
5
  colorTo: green
6
  sdk: gradio
7
+ sdk_version: 5.20.0
8
  app_file: app.py
9
  pinned: false
10
+ short_description: Chinese image-text retrieval demo for M2-Encoder 0.4B
11
  ---
12
 
13
+ # M2-Encoder 0.4B Demo
14
+
15
+ This Space runs `malusama/M2-Encoder-0.4B` on Hugging Face Spaces `CPU Basic`.
16
+
17
+ What it does:
18
+
19
+ - Upload one image
20
+ - Enter candidate labels in Chinese or English
21
+ - Return raw similarity scores and softmax probabilities
22
+
23
+ Notes:
24
+
25
+ - The first request after startup can be slow because the model must load on CPU.
26
+ - This demo is intended for low-frequency testing rather than production traffic.
__pycache__/app.cpython-311.pyc ADDED
Binary file (6.63 kB). View file
 
app.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import lru_cache
2
+ import os
3
+
4
+ import torch
5
+ from huggingface_hub import snapshot_download
6
+ from PIL import Image
7
+ from transformers import AutoModel, AutoProcessor
8
+
9
+
10
+ os.environ["HF_ENDPOINT"] = "https://huggingface.co"
11
+
12
+
13
+ MODEL_ID = "malusama/M2-Encoder-0.4B"
14
+ MODEL_REVISION = "5b673bc65a31d72c9245ad7a161ba5a378f6ad88"
15
+ DEVICE = torch.device("cpu")
16
+
17
+
18
+ @lru_cache(maxsize=1)
19
+ def load_components():
20
+ model_dir = snapshot_download(
21
+ repo_id=MODEL_ID,
22
+ revision=MODEL_REVISION,
23
+ )
24
+ model = AutoModel.from_pretrained(
25
+ model_dir,
26
+ trust_remote_code=True,
27
+ )
28
+ processor = AutoProcessor.from_pretrained(
29
+ model_dir,
30
+ trust_remote_code=True,
31
+ )
32
+ model.to(DEVICE)
33
+ model.eval()
34
+ return model, processor
35
+
36
+
37
+ def parse_labels(text: str):
38
+ items = []
39
+ for raw in text.splitlines():
40
+ for part in raw.split(","):
41
+ label = part.strip()
42
+ if label:
43
+ items.append(label)
44
+ return items
45
+
46
+
47
+ def run_demo(image: Image.Image, candidate_text: str):
48
+ labels = parse_labels(candidate_text)
49
+ if image is None:
50
+ raise ValueError("Please upload an image.")
51
+ if not labels:
52
+ raise ValueError("Please enter at least one label.")
53
+
54
+ model, processor = load_components()
55
+ with torch.no_grad():
56
+ text_inputs = processor(text=labels, return_tensors="pt")
57
+ image_inputs = processor(images=image.convert("RGB"), return_tensors="pt")
58
+
59
+ text_outputs = model(**text_inputs)
60
+ image_outputs = model(**image_inputs)
61
+
62
+ scores = (image_outputs.image_embeds @ text_outputs.text_embeds.t()).squeeze(0)
63
+ probs = scores.softmax(dim=-1)
64
+
65
+ table = [
66
+ [label, float(score), float(prob)]
67
+ for label, score, prob in zip(labels, scores.tolist(), probs.tolist())
68
+ ]
69
+ table.sort(key=lambda row: row[2], reverse=True)
70
+
71
+ top_label = table[0][0]
72
+ top_prob = table[0][2]
73
+ summary = f"Top match: {top_label} ({top_prob:.4f})"
74
+ raw = {
75
+ "labels": labels,
76
+ "scores": scores.tolist(),
77
+ "probs": probs.tolist(),
78
+ }
79
+ return summary, table, raw
80
+
81
+
82
+ def build_demo():
83
+ import gradio as gr
84
+
85
+ with gr.Blocks() as demo:
86
+ gr.Markdown(
87
+ """
88
+ # M2-Encoder 0.4B
89
+
90
+ Upload one image and enter candidate labels, one per line or comma-separated.
91
+ This Space runs on `CPU Basic`, so the first request can be slow.
92
+ """
93
+ )
94
+
95
+ with gr.Row():
96
+ image_input = gr.Image(type="pil", label="Image")
97
+ labels_input = gr.Textbox(
98
+ label="Candidate Labels",
99
+ lines=8,
100
+ value="杰尼龟\n妙蛙种子\n小火龙\n皮卡丘",
101
+ )
102
+
103
+ run_button = gr.Button("Run Matching", variant="primary")
104
+ summary_output = gr.Textbox(label="Summary")
105
+ table_output = gr.Dataframe(
106
+ headers=["label", "score", "prob"],
107
+ datatype=["str", "number", "number"],
108
+ label="Results",
109
+ )
110
+ json_output = gr.JSON(label="Raw Output")
111
+
112
+ run_button.click(
113
+ run_demo,
114
+ inputs=[image_input, labels_input],
115
+ outputs=[summary_output, table_output, json_output],
116
+ )
117
+ return demo
118
+
119
+
120
+ try:
121
+ demo = build_demo()
122
+ except ModuleNotFoundError:
123
+ demo = None
124
+
125
+ if __name__ == "__main__":
126
+ if demo is None:
127
+ raise RuntimeError("gradio is required to launch this app.")
128
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio==5.20.0
2
+ torch
3
+ pytorch_lightning<=2.0.8
4
+ transformers==4.17.0
5
+ safetensors
6
+ Pillow
7
+ tqdm
8
+ einops
9
+ sacred
10
+ timm
11
+ torchvision
12
+ fairscale
13
+ numpy
14
+ opencv-python
15
+ sentencepiece
16
+ huggingface_hub==0.26.2