Balab2021 commited on
Commit
bf3d8f9
·
verified ·
1 Parent(s): f34bc87

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -100
app.py CHANGED
@@ -1,114 +1,80 @@
1
- import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
  import os
5
  from typing import List, Tuple
6
 
7
  import gradio as gr
8
  import torch
9
- from dotenv import load_dotenv
10
  from transformers import AutoModelForCausalLM, AutoTokenizer
11
 
12
- load_dotenv()
13
-
14
  MODEL_ID = "Balab2021/qwen-workflow-planner-qwen2p5-lora"
15
 
16
- def get_hf_token() -> str:
17
- if not HF_TOKEN_KEYS:
18
- raise RuntimeError(
19
- "Missing HF_TOKEN_KEYS environment variable. "
20
- "Set it to one or more token env var names (comma-separated), "
21
- "for example: HF_TOKEN_KEYS=HF_TOKEN"
22
- )
23
-
24
- raw_value = HF_TOKEN_KEYS.strip().strip("\"'")
25
-
26
- # Allow HF_TOKEN_KEYS to hold a direct Hugging Face token.
27
- if raw_value.startswith("hf_"):
28
- return raw_value
29
-
30
- keys = [key.strip() for key in raw_value.split(",") if key.strip()]
31
-
32
- if not keys:
33
- raise RuntimeError(
34
- "HF_TOKEN_KEYS is empty. "
35
- "Set it to one or more token env var names, for example: HF_TOKEN"
36
- )
37
-
38
- for key in keys:
39
- token = os.getenv(key)
40
- if token:
41
- return token.strip().strip("\"'")
42
- raise RuntimeError(
43
- "Missing Hugging Face token. None of the env vars listed in "
44
- f"HF_TOKEN_KEYS contain a token value. Checked keys: {', '.join(keys)}"
45
- )
46
 
 
 
47
 
48
  def build_messages(history: List[Tuple[str, str]], user_message: str):
49
- messages = []
50
- for user_text, assistant_text in history:
51
- if user_text:
52
- messages.append({"role": "user", "content": user_text})
53
- if assistant_text:
54
- messages.append({"role": "assistant", "content": assistant_text})
55
- messages.append({"role": "user", "content": user_message})
56
- return messages
57
-
58
-
59
- def create_app():
60
- load_dotenv()
61
- token = get_hf_token()
62
-
63
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=token)
64
- model = AutoModelForCausalLM.from_pretrained(
65
- MODEL_ID,
66
- token=token,
67
- torch_dtype="auto",
68
- device_map="auto",
69
- )
70
-
71
- def chat_fn(
72
- message: str,
73
- history: List[Tuple[str, str]],
74
- temperature: float,
75
- max_new_tokens: int,
76
- ) -> str:
77
- messages = build_messages(history, message)
78
- prompt = tokenizer.apply_chat_template(
79
- messages,
80
- tokenize=False,
81
- add_generation_prompt=True,
82
- )
83
-
84
- inputs = tokenizer(prompt, return_tensors="pt")
85
- inputs = {k: v.to(model.device) for k, v in inputs.items()}
86
-
87
- with torch.no_grad():
88
- output_ids = model.generate(
89
- **inputs,
90
- max_new_tokens=max_new_tokens,
91
- temperature=temperature,
92
- do_sample=temperature > 0,
93
- pad_token_id=tokenizer.eos_token_id,
94
- )
95
-
96
- generated_ids = output_ids[0][inputs["input_ids"].shape[-1] :]
97
- response = tokenizer.decode(generated_ids, skip_special_tokens=True).strip()
98
- return response
99
-
100
- demo = gr.ChatInterface(
101
- fn=chat_fn,
102
- additional_inputs=[
103
- gr.Slider(0.0, 1.5, value=0.2, step=0.05, label="Temperature"),
104
- gr.Slider(32, 2048, value=512, step=32, label="Max New Tokens"),
105
- ],
106
- title="Qwen Workflow Planner Chat",
107
- description=f"Model: {MODEL_ID}",
108
- )
109
- return demo
110
-
111
 
112
  if __name__ == "__main__":
113
- app = create_app()
114
- app.launch()
 
 
 
 
1
  import os
2
  from typing import List, Tuple
3
 
4
  import gradio as gr
5
  import torch
 
6
  from transformers import AutoModelForCausalLM, AutoTokenizer
7
 
 
 
8
  MODEL_ID = "Balab2021/qwen-workflow-planner-qwen2p5-lora"
9
 
10
+ # Hugging Face Spaces automatically provides this if you set it in Secrets
11
+ HF_TOKEN = os.getenv("HF_TOKEN")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ if not HF_TOKEN:
14
+ raise ValueError("HF_TOKEN environment variable is missing. Please add it in Space Settings → Secrets.")
15
 
16
  def build_messages(history: List[Tuple[str, str]], user_message: str):
17
+ messages = []
18
+ for user_text, assistant_text in history:
19
+ if user_text:
20
+ messages.append({"role": "user", "content": user_text})
21
+ if assistant_text:
22
+ messages.append({"role": "assistant", "content": assistant_text})
23
+ messages.append({"role": "user", "content": user_message})
24
+ return messages
25
+
26
+
27
+ # Load model at startup
28
+ print(f"Loading model: {MODEL_ID} ...")
29
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_TOKEN)
30
+ model = AutoModelForCausalLM.from_pretrained(
31
+ MODEL_ID,
32
+ token=HF_TOKEN,
33
+ torch_dtype="auto",
34
+ device_map="auto",
35
+ )
36
+
37
+
38
+ def chat_fn(
39
+ message: str,
40
+ history: List[Tuple[str, str]],
41
+ temperature: float,
42
+ max_new_tokens: int,
43
+ ) -> str:
44
+ messages = build_messages(history, message)
45
+
46
+ prompt = tokenizer.apply_chat_template(
47
+ messages,
48
+ tokenize=False,
49
+ add_generation_prompt=True,
50
+ )
51
+
52
+ inputs = tokenizer(prompt, return_tensors="pt")
53
+ inputs = {k: v.to(model.device) for k, v in inputs.items()}
54
+
55
+ with torch.no_grad():
56
+ output_ids = model.generate(
57
+ **inputs,
58
+ max_new_tokens=max_new_tokens,
59
+ temperature=temperature,
60
+ do_sample=temperature > 0,
61
+ pad_token_id=tokenizer.eos_token_id,
62
+ )
63
+
64
+ generated_ids = output_ids[0][inputs["input_ids"].shape[-1] :]
65
+ response = tokenizer.decode(generated_ids, skip_special_tokens=True).strip()
66
+ return response
67
+
68
+
69
+ demo = gr.ChatInterface(
70
+ fn=chat_fn,
71
+ additional_inputs=[
72
+ gr.Slider(0.0, 1.5, value=0.2, step=0.05, label="Temperature"),
73
+ gr.Slider(32, 2048, value=512, step=32, label="Max New Tokens"),
74
+ ],
75
+ title="Qwen Workflow Planner Chat",
76
+ description=f"Model: {MODEL_ID}",
77
+ )
 
78
 
79
  if __name__ == "__main__":
80
+ demo.launch()