Nihal2000 commited on
Commit
b4f13ec
·
1 Parent(s): dfdc0c0

gemma3 is not compatible for spaces

Browse files
Files changed (1) hide show
  1. app.py +28 -34
app.py CHANGED
@@ -1,49 +1,43 @@
1
  import os
2
  import gradio as gr
3
- from huggingface_hub import InferenceClient
4
 
5
- HF_TOKEN = os.getenv("HF_TOKEN")
6
 
7
- def respond(message, history, system_message, max_tokens, temperature, top_p):
8
- client = InferenceClient(model="Nihal2000/gemma3-merged", token=HF_TOKEN)
9
-
10
- messages = [{"role": "system", "content": system_message}]
11
- messages.extend(history)
12
- messages.append({"role": "user", "content": message})
13
-
14
- response = ""
15
- try:
16
- for msg in client.chat_completion(
17
- messages,
18
- max_tokens=max_tokens,
19
- stream=True,
20
- temperature=temperature,
21
- top_p=top_p,
22
- ):
23
- choices = msg.choices
24
- token = ""
25
- if len(choices) and choices[0].delta.content:
26
- token = choices[0].delta.content
27
- response += token
28
- yield response
29
- except Exception as e:
30
- yield f"[Error] {str(e)}"
31
 
32
- if not response:
33
- yield "[No response from model]"
 
 
 
 
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  chatbot = gr.ChatInterface(
37
  respond,
38
  type="messages",
39
  additional_inputs=[
40
- gr.Textbox(value="You are an Automotive Chatbot.", label="System message"),
41
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
42
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
43
- gr.Slider(
44
- minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"
45
- ),
46
- ],
47
  )
48
 
49
  with gr.Blocks() as demo:
 
1
  import os
2
  import gradio as gr
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
 
5
+ model_id = "Nihal2000/gemma3-merged"
6
 
7
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
8
+ model = AutoModelForCausalLM.from_pretrained(model_id)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
+ gen = pipeline(
11
+ "text-generation",
12
+ model=model,
13
+ tokenizer=tokenizer,
14
+ device=0 if "CUDA_VISIBLE_DEVICES" in os.environ else -1
15
+ )
16
 
17
+ def respond(message, history, system_message, max_tokens, temperature, top_p):
18
+ prompt = system_message + "\n" + "\n".join(
19
+ [f"User: {u}\nAssistant: {a}" for u, a in history]
20
+ ) + f"\nUser: {message}\nAssistant:"
21
+ out = gen(
22
+ prompt,
23
+ max_new_tokens=max_tokens,
24
+ temperature=temperature,
25
+ top_p=top_p,
26
+ do_sample=False,
27
+ )
28
+ # Return current assistant final response
29
+ response = out[0]["generated_text"][len(prompt):]
30
+ return response
31
 
32
  chatbot = gr.ChatInterface(
33
  respond,
34
  type="messages",
35
  additional_inputs=[
36
+ gr.Textbox(value="You are an automotive assistant.", label="System message"),
37
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
38
+ gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
39
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
40
+ ]
 
 
41
  )
42
 
43
  with gr.Blocks() as demo: