TaylorKaua commited on
Commit
2048899
·
verified ·
1 Parent(s): 5bd5fe9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +136 -20
app.py CHANGED
@@ -1,28 +1,146 @@
 
1
  import os
2
  import gradio as gr
3
  from huggingface_hub import InferenceClient
4
 
5
- MODEL_ID = "mradermacher/sk2decompile-struct-6.7b-GGUF"
6
 
7
- def get_client(hf_token):
8
- token = getattr(hf_token, "token", None) or os.getenv("HF_TOKEN")
 
 
 
 
 
 
 
9
  return InferenceClient(model=MODEL_ID, token=token)
10
 
11
- def respond(message, history, system_message, max_tokens, temperature, top_p, hf_token: gr.OAuthToken):
12
- try:
13
- client = get_client(hf_token)
14
- messages = [{"role": "system", "content": system_message}]
15
- messages += history
16
- messages.append({"role": "user", "content": message})
17
- response = ""
18
- for chunk in client.chat_completion(messages=messages, max_tokens=max_tokens, stream=True,
19
- temperature=temperature, top_p=top_p):
20
- token = getattr(chunk.choices[0].delta, "content", "") if hasattr(chunk.choices[0], "delta") else ""
21
- response += token
22
- yield response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  except Exception as e:
24
- yield f"❌ Erro: {e}"
 
25
 
 
26
  chatbot = gr.ChatInterface(
27
  respond,
28
  type="messages",
@@ -31,14 +149,12 @@ chatbot = gr.ChatInterface(
31
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
32
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
33
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
34
- gr.LoginButton(),
35
  ],
36
  )
37
 
38
- demo = gr.Blocks()
39
- with demo:
40
  with gr.Sidebar():
41
- gr.Markdown("Login com Hugging Face para usar o modelo protegido.")
42
  chatbot.render()
43
 
44
  if __name__ == "__main__":
 
1
+ # app.py — versão robusta sem LoginButton (usa HF_TOKEN secret)
2
  import os
3
  import gradio as gr
4
  from huggingface_hub import InferenceClient
5
 
6
+ MODEL_ID = "LLM4Binary/sk2decompile-struct-6.7b" # use endpoint hospedado (Inference API)
7
 
8
+ def get_client(hf_token_obj):
9
+ # hf_token_obj vem do Gradio apenas quando há OAuth; senão usamos HF_TOKEN secret
10
+ token = None
11
+ try:
12
+ token = getattr(hf_token_obj, "token", None)
13
+ except Exception:
14
+ token = None
15
+ if not token:
16
+ token = os.getenv("HF_TOKEN")
17
  return InferenceClient(model=MODEL_ID, token=token)
18
 
19
+ def respond(message, history, *args):
20
+ """
21
+ Maneira robusta de receber argumentos vindos do ChatInterface.
22
+ args (na ordem do additional_inputs):
23
+ 0 -> system_message (Textbox)
24
+ 1 -> max_tokens (Slider)
25
+ 2 -> temperature (Slider)
26
+ 3 -> top_p (Slider)
27
+ 4 -> hf_token (quando existe Login/OAuth) -- aqui normalmente None porque removemos LoginButton
28
+ """
29
+ # defaults
30
+ system_message = ""
31
+ try:
32
+ if len(args) >= 1 and args[0] is not None:
33
+ system_message = args[0]
34
+ except Exception:
35
+ system_message = ""
36
+
37
+ try:
38
+ max_tokens = int(args[1]) if len(args) >= 2 and args[1] is not None else 512
39
+ except Exception:
40
+ max_tokens = 512
41
+
42
+ try:
43
+ temperature = float(args[2]) if len(args) >= 3 and args[2] is not None else 0.7
44
+ except Exception:
45
+ temperature = 0.7
46
+
47
+ try:
48
+ top_p = float(args[3]) if len(args) >= 4 and args[3] is not None else 0.95
49
+ except Exception:
50
+ top_p = 0.95
51
+
52
+ hf_token_obj = args[4] if len(args) >= 5 else None
53
+
54
+ try:
55
+ client = get_client(hf_token_obj)
56
+ except Exception as e:
57
+ yield f"❌ Erro ao criar client: {e}"
58
+ return
59
+
60
+ msgs = []
61
+ if system_message:
62
+ msgs.append({"role": "system", "content": system_message})
63
+ # history normalmente é lista de pares (user, assistant) — ChatInterface pode enviar de formas diferentes
64
+ if history:
65
+ # se history já vier no formato role/content, adiciona diretamente
66
+ for item in history:
67
+ if isinstance(item, dict) and "role" in item and "content" in item:
68
+ msgs.append(item)
69
+ elif isinstance(item, (list, tuple)) and len(item) == 2:
70
+ # item = (user_text, assistant_text) — adiciona como duas mensagens
71
+ msgs.append({"role": "user", "content": item[0]})
72
+ msgs.append({"role": "assistant", "content": item[1]})
73
+ msgs.append({"role": "user", "content": message})
74
+
75
+ response = ""
76
+ try:
77
+ stream = client.chat_completion(
78
+ messages=msgs,
79
+ max_tokens=max_tokens,
80
+ stream=True,
81
+ temperature=temperature,
82
+ top_p=top_p,
83
+ )
84
+ except Exception as e:
85
+ yield f"❌ Erro ao chamar chat_completion: {e}"
86
+ return
87
+
88
+ # extrator simples e tolerante de tokens
89
+ def _get_chunk_text(chunk):
90
+ try:
91
+ if hasattr(chunk, "choices"):
92
+ c = chunk.choices
93
+ if c and len(c) > 0:
94
+ delta = getattr(c[0], "delta", None)
95
+ if delta:
96
+ return getattr(delta, "content", "") or ""
97
+ if isinstance(chunk, dict):
98
+ if "generated_text" in chunk and chunk["generated_text"]:
99
+ return chunk["generated_text"]
100
+ ch = chunk.get("choices", [])
101
+ if ch and isinstance(ch, list) and len(ch) > 0:
102
+ first = ch[0]
103
+ if isinstance(first, dict):
104
+ d = first.get("delta")
105
+ if isinstance(d, dict):
106
+ return d.get("content", "") or ""
107
+ msg = first.get("message")
108
+ if isinstance(msg, dict):
109
+ return msg.get("content", "") or ""
110
+ return ""
111
+ except Exception:
112
+ return ""
113
+
114
+ try:
115
+ for chunk in stream:
116
+ token = _get_chunk_text(chunk)
117
+ if token:
118
+ response += token
119
+ yield response
120
+ # se nada veio pelo stream, tentar chamada síncrona como fallback
121
+ if response == "":
122
+ try:
123
+ final = client.chat_completion(messages=msgs, max_tokens=max_tokens, stream=False,
124
+ temperature=temperature, top_p=top_p)
125
+ # tentar extrair texto do final
126
+ if isinstance(final, dict) and "generated_text" in final:
127
+ response = final["generated_text"]
128
+ elif hasattr(final, "choices") and final.choices:
129
+ # tentar acessar message/content
130
+ try:
131
+ response = final.choices[0].message.content
132
+ except Exception:
133
+ # fallback
134
+ pass
135
+ if response:
136
+ yield response
137
+ except Exception:
138
+ pass
139
  except Exception as e:
140
+ yield f"❌ Erro durante streaming: {e}"
141
+ return
142
 
143
+ # UI — note que REMOVI o gr.LoginButton() para evitar exigir OAUTH config
144
  chatbot = gr.ChatInterface(
145
  respond,
146
  type="messages",
 
149
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
150
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
151
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
 
152
  ],
153
  )
154
 
155
+ with gr.Blocks() as demo:
 
156
  with gr.Sidebar():
157
+ gr.Markdown("Configure HF_TOKEN em Settings Secrets (opcional).")
158
  chatbot.render()
159
 
160
  if __name__ == "__main__":