basmala12 commited on
Commit
600d68d
·
verified ·
1 Parent(s): 6c4c35e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -41
app.py CHANGED
@@ -1,49 +1,42 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
 
 
4
 
5
- def respond(
6
- message,
7
- history: list[dict[str, str]],
8
- system_message,
9
- max_tokens,
10
- temperature,
11
- top_p,
12
- hf_token: gr.OAuthToken,
13
- ):
14
- """
15
- Updated to use your fine-tuned model:
16
- basmala12/smollm_finetuning5
17
- """
18
 
19
- # Load your fine-tuned model through HF Inference API
20
- client = InferenceClient(
21
- token=hf_token.token,
22
- model="basmala12/smollm_finetuning5" # <- your model
23
- )
 
24
 
25
- # Build chat messages
26
  messages = [{"role": "system", "content": system_message}]
27
  messages.extend(history)
28
  messages.append({"role": "user", "content": message})
29
 
30
- response = ""
31
-
32
- # Streaming response from HF Inference API
33
- for msg in client.chat_completion(
34
  messages,
35
- max_tokens=max_tokens,
36
- stream=True,
 
 
 
 
 
37
  temperature=temperature,
38
  top_p=top_p,
39
- ):
40
- choices = msg.choices
41
- token = ""
42
- if len(choices) and choices[0].delta.content:
43
- token = choices[0].delta.content
 
44
 
45
- response += token
46
- yield response
47
 
48
 
49
  chatbot = gr.ChatInterface(
@@ -52,19 +45,17 @@ chatbot = gr.ChatInterface(
52
  additional_inputs=[
53
  gr.Textbox(
54
  value="Give short answers with brief logical reasoning.",
55
- label="System message"
56
  ),
57
- gr.Slider(minimum=1, maximum=2048, value=256, step=1, label="Max new tokens"),
58
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
59
- gr.Slider(minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="Top-p"),
60
  ],
61
  )
62
 
63
- with gr.Blocks() as demo:
64
- with gr.Sidebar():
65
- gr.LoginButton() # user logs in with their HF account (required for private models)
66
  chatbot.render()
67
 
68
-
69
  if __name__ == "__main__":
70
  demo.launch()
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
 
4
+ MODEL_NAME = "basmala12/smollm_finetuning5"
5
 
6
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
7
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
 
 
 
 
 
 
 
 
 
 
 
8
 
9
+ pipe = pipeline(
10
+ "text-generation",
11
+ model=model,
12
+ tokenizer=tokenizer,
13
+ max_new_tokens=200,
14
+ )
15
 
16
+ def respond(message, history, system_message, max_tokens, temperature, top_p):
17
  messages = [{"role": "system", "content": system_message}]
18
  messages.extend(history)
19
  messages.append({"role": "user", "content": message})
20
 
21
+ prompt = tokenizer.apply_chat_template(
 
 
 
22
  messages,
23
+ tokenize=False,
24
+ add_generation_prompt=True
25
+ )
26
+
27
+ output = pipe(
28
+ prompt,
29
+ max_new_tokens=max_tokens,
30
  temperature=temperature,
31
  top_p=top_p,
32
+ do_sample=True
33
+ )[0]["generated_text"]
34
+
35
+ # Extract assistant response
36
+ answer = output.split("<|im_start|>assistant")[-1]
37
+ answer = answer.replace("<|im_end|>", "").strip()
38
 
39
+ return answer
 
40
 
41
 
42
  chatbot = gr.ChatInterface(
 
45
  additional_inputs=[
46
  gr.Textbox(
47
  value="Give short answers with brief logical reasoning.",
48
+ label="System Message"
49
  ),
50
+ gr.Slider(1, 1024, value=256, step=1, label="Max new tokens"),
51
+ gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature"),
52
+ gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p"),
53
  ],
54
  )
55
 
56
+ demo = gr.Blocks()
57
+ with demo:
 
58
  chatbot.render()
59
 
 
60
  if __name__ == "__main__":
61
  demo.launch()