yamadamya commited on
Commit
9cd8054
·
verified ·
1 Parent(s): ad1f3e4

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +13 -13
  2. app.py +98 -0
  3. requirements.txt +4 -0
README.md CHANGED
@@ -1,13 +1,13 @@
1
- ---
2
- title: ELYZA Diffusion
3
- emoji:
4
- colorFrom: blue
5
- colorTo: gray
6
- sdk: gradio
7
- sdk_version: 6.3.0
8
- app_file: app.py
9
- pinned: false
10
- short_description: ELYZA-Diffusion
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: ELYZA Diffusion LLM CPU Demo
3
+ emoji: 🧠
4
+ colorFrom: gray
5
+ colorTo: blue
6
+ sdk: gradio
7
+ python_version: "3.10"
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ # ELYZA Diffusion LLM (CPU)
13
+ CPU-only Space demo for ELYZA Diffusion LLM.
app.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import gradio as gr
4
+ from transformers import AutoModel, AutoTokenizer
5
+
6
+ # Instruct版(必要なら別IDへ変更)
7
+ MODEL_ID = os.getenv("MODEL_ID", "elyza/ELYZA-Diffusion-Instruct-1.0-Dream-7B")
8
+
9
+ # --- CPU固定 ---
10
+ DEVICE = "cpu"
11
+ DTYPE = torch.float32
12
+
13
+ print(f"Starting CPU Space: DEVICE={DEVICE}, DTYPE={DTYPE}, MODEL_ID={MODEL_ID}")
14
+
15
+ # 起動時に一度だけロード(重要)
16
+ model = AutoModel.from_pretrained(
17
+ MODEL_ID,
18
+ torch_dtype=DTYPE,
19
+ trust_remote_code=True,
20
+ ).to(DEVICE).eval()
21
+
22
+ tokenizer = AutoTokenizer.from_pretrained(
23
+ MODEL_ID,
24
+ trust_remote_code=True,
25
+ )
26
+
27
+ @torch.no_grad()
28
+ def generate(prompt, steps, max_new_tokens, temperature, top_p, alg_temp):
29
+ prompt = (prompt or "").strip()
30
+ if not prompt:
31
+ return "プロンプトを入力してください。"
32
+
33
+ # CPUは遅いのでガード(想定外の値で固まるのを防ぐ)
34
+ steps = int(max(4, min(int(steps), 64)))
35
+ max_new_tokens = int(max(16, min(int(max_new_tokens), 256)))
36
+
37
+ messages = [{"role": "user", "content": prompt}]
38
+ inputs = tokenizer.apply_chat_template(
39
+ messages,
40
+ return_tensors="pt",
41
+ return_dict=True,
42
+ add_generation_prompt=True,
43
+ )
44
+ input_ids = inputs.input_ids.to(DEVICE)
45
+ attention_mask = inputs.attention_mask.to(DEVICE)
46
+
47
+ out = model.diffusion_generate(
48
+ input_ids,
49
+ attention_mask=attention_mask,
50
+ steps=steps,
51
+ max_new_tokens=max_new_tokens,
52
+ temperature=float(temperature),
53
+ top_p=float(top_p),
54
+ alg="entropy",
55
+ alg_temp=float(alg_temp),
56
+ )
57
+
58
+ text = tokenizer.decode(
59
+ out.sequences[0][input_ids.size(1):],
60
+ skip_special_tokens=True,
61
+ )
62
+ return text
63
+
64
+ with gr.Blocks() as demo:
65
+ gr.Markdown(
66
+ "## ELYZA Diffusion LLM (CPU-only)\n"
67
+ "- CPUは非常に遅いので、まずは steps=16 / max_new_tokens=128 で試してください。"
68
+ )
69
+
70
+ prompt = gr.Textbox(
71
+ label="Prompt",
72
+ lines=6,
73
+ value="要点を短くまとめて、仕事の集中力を上げるコツを3つ教えてください。"
74
+ )
75
+
76
+ with gr.Row():
77
+ steps = gr.Slider(4, 64, value=16, step=1, label="steps (CPU recommended: 8-24)")
78
+ max_new_tokens = gr.Slider(16, 256, value=128, step=1, label="max_new_tokens (CPU recommended: 64-160)")
79
+
80
+ with gr.Row():
81
+ temperature = gr.Slider(0.1, 1.5, value=0.7, step=0.05, label="temperature")
82
+ top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.01, label="top_p")
83
+ alg_temp = gr.Slider(0.1, 1.5, value=0.7, step=0.05, label="alg_temp")
84
+
85
+ run = gr.Button("Generate")
86
+ out = gr.Textbox(label="Output", lines=14)
87
+
88
+ run.click(
89
+ fn=generate,
90
+ inputs=[prompt, steps, max_new_tokens, temperature, top_p, alg_temp],
91
+ outputs=[out],
92
+ )
93
+
94
+ # 公開Spaceで同時アクセス耐性を少し上げる
95
+ demo.queue(max_size=16)
96
+
97
+ if __name__ == "__main__":
98
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio
2
+ transformers
3
+ accelerate
4
+ torch