Spaces:

malusama
/

M2-Encoder-0.4B-Space

Sleeping

App Files Files Community

malusama commited on Mar 18

Commit

fba79e3

verified ·

1 Parent(s): a157459

Initial CPU Basic Gradio Space

Browse files

Files changed (4) hide show

README.md +19 -5
__pycache__/app.cpython-311.pyc +0 -0
app.py +128 -0
requirements.txt +16 -0

README.md CHANGED Viewed

@@ -1,12 +1,26 @@
 ---
-title: M2 Encoder 0.4B Space
-emoji: 👁
-colorFrom: purple
 colorTo: green
 sdk: gradio
-sdk_version: 6.9.0
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: M2-Encoder 0.4B Demo
+emoji: 🖼️
+colorFrom: blue
 colorTo: green
 sdk: gradio
+sdk_version: 5.20.0
 app_file: app.py
 pinned: false
+short_description: Chinese image-text retrieval demo for M2-Encoder 0.4B
 ---
+# M2-Encoder 0.4B Demo
+This Space runs `malusama/M2-Encoder-0.4B` on Hugging Face Spaces `CPU Basic`.
+What it does:
+- Upload one image
+- Enter candidate labels in Chinese or English
+- Return raw similarity scores and softmax probabilities
+Notes:
+- The first request after startup can be slow because the model must load on CPU.
+- This demo is intended for low-frequency testing rather than production traffic.

__pycache__/app.cpython-311.pyc ADDED Viewed

Binary file (6.63 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,128 @@

+from functools import lru_cache
+import os
+import torch
+from huggingface_hub import snapshot_download
+from PIL import Image
+from transformers import AutoModel, AutoProcessor
+os.environ["HF_ENDPOINT"] = "https://huggingface.co"
+MODEL_ID = "malusama/M2-Encoder-0.4B"
+MODEL_REVISION = "5b673bc65a31d72c9245ad7a161ba5a378f6ad88"
+DEVICE = torch.device("cpu")
+@lru_cache(maxsize=1)
+def load_components():
+    model_dir = snapshot_download(
+        repo_id=MODEL_ID,
+        revision=MODEL_REVISION,
+    )
+    model = AutoModel.from_pretrained(
+        model_dir,
+        trust_remote_code=True,
+    )
+    processor = AutoProcessor.from_pretrained(
+        model_dir,
+        trust_remote_code=True,
+    )
+    model.to(DEVICE)
+    model.eval()
+    return model, processor
+def parse_labels(text: str):
+    items = []
+    for raw in text.splitlines():
+        for part in raw.split(","):
+            label = part.strip()
+            if label:
+                items.append(label)
+    return items
+def run_demo(image: Image.Image, candidate_text: str):
+    labels = parse_labels(candidate_text)
+    if image is None:
+        raise ValueError("Please upload an image.")
+    if not labels:
+        raise ValueError("Please enter at least one label.")
+    model, processor = load_components()
+    with torch.no_grad():
+        text_inputs = processor(text=labels, return_tensors="pt")
+        image_inputs = processor(images=image.convert("RGB"), return_tensors="pt")
+        text_outputs = model(**text_inputs)
+        image_outputs = model(**image_inputs)
+        scores = (image_outputs.image_embeds @ text_outputs.text_embeds.t()).squeeze(0)
+        probs = scores.softmax(dim=-1)
+    table = [
+        [label, float(score), float(prob)]
+        for label, score, prob in zip(labels, scores.tolist(), probs.tolist())
+    ]
+    table.sort(key=lambda row: row[2], reverse=True)
+    top_label = table[0][0]
+    top_prob = table[0][2]
+    summary = f"Top match: {top_label} ({top_prob:.4f})"
+    raw = {
+        "labels": labels,
+        "scores": scores.tolist(),
+        "probs": probs.tolist(),
+    }
+    return summary, table, raw
+def build_demo():
+    import gradio as gr
+    with gr.Blocks() as demo:
+        gr.Markdown(
+            """
+            # M2-Encoder 0.4B
+            Upload one image and enter candidate labels, one per line or comma-separated.
+            This Space runs on `CPU Basic`, so the first request can be slow.
+            """
+        )
+        with gr.Row():
+            image_input = gr.Image(type="pil", label="Image")
+            labels_input = gr.Textbox(
+                label="Candidate Labels",
+                lines=8,
+                value="杰尼龟\n妙蛙种子\n小火龙\n皮卡丘",
+            )
+        run_button = gr.Button("Run Matching", variant="primary")
+        summary_output = gr.Textbox(label="Summary")
+        table_output = gr.Dataframe(
+            headers=["label", "score", "prob"],
+            datatype=["str", "number", "number"],
+            label="Results",
+        )
+        json_output = gr.JSON(label="Raw Output")
+        run_button.click(
+            run_demo,
+            inputs=[image_input, labels_input],
+            outputs=[summary_output, table_output, json_output],
+        )
+    return demo
+try:
+    demo = build_demo()
+except ModuleNotFoundError:
+    demo = None
+if __name__ == "__main__":
+    if demo is None:
+        raise RuntimeError("gradio is required to launch this app.")
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+gradio==5.20.0
+torch
+pytorch_lightning<=2.0.8
+transformers==4.17.0
+safetensors
+Pillow
+tqdm
+einops
+sacred
+timm
+torchvision
+fairscale
+numpy
+opencv-python
+sentencepiece
+huggingface_hub==0.26.2