Abeersherif commited on
Commit
2797db8
·
verified ·
1 Parent(s): 3dc1295

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -47
app.py CHANGED
@@ -1,76 +1,56 @@
1
  import gradio as gr
2
- import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
4
 
5
- # ==============================
6
- # CONFIG: YOUR MODEL HERE
7
- # ==============================
8
- MODEL_NAME = "smol-medical-meadow-FT" # <--- change if needed
9
 
10
-
11
- # ==============================
12
- # LOAD MODEL + TOKENIZER
13
- # ==============================
14
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
15
- if tokenizer.pad_token is None:
16
- tokenizer.pad_token = tokenizer.eos_token
17
-
18
- model = AutoModelForCausalLM.from_pretrained(
19
- MODEL_NAME,
20
- torch_dtype=torch.float32
21
- )
22
 
23
  pipe = pipeline(
24
  "text-generation",
25
  model=model,
26
  tokenizer=tokenizer,
27
- return_full_text=False, # only return assistant's continuation
28
  )
29
 
30
-
31
- # ==============================
32
- # CHAT FUNCTION
33
- # ==============================
34
  def respond(message, history, system_message, max_tokens, temperature, top_p):
35
-
36
- # Build the plain conversation (SmolLM2 style)
37
- prompt = f"System: {system_message}\n\n"
38
-
39
- for turn in history:
40
- prompt += f"User: {turn['user']}\n"
41
- prompt += f"Assistant: {turn['assistant']}\n"
42
-
43
- prompt += f"User: {message}\nAssistant:"
44
-
45
- # Run generation
46
- response = pipe(
 
 
47
  prompt,
48
  max_new_tokens=max_tokens,
49
  temperature=temperature,
50
  top_p=top_p,
51
  do_sample=True,
52
- eos_token_id=tokenizer.eos_token_id,
53
  )[0]["generated_text"]
54
 
55
- # Clean trailing text
56
- # Stop if it starts generating new questions
57
- for stop in ["User:", "System:", "Q:", "\n\n"]:
58
- if stop in response:
59
- response = response.split(stop)[0].strip()
60
-
61
- return response.strip()
62
 
 
63
 
64
- # ==============================
65
- # GRADIO UI
66
- # ==============================
67
  chatbot = gr.ChatInterface(
68
  fn=respond,
69
  type="messages",
70
  additional_inputs=[
71
- gr.Textbox("You are a careful medical assistant. Answer clearly and safely.", label="System message"),
72
- gr.Slider(10, 512, value=150, step=5, label="Max new tokens"),
73
- gr.Slider(0.1, 2.0, value=0.7, step=0.05, label="Temperature"),
74
  gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p"),
75
  ],
76
  )
 
1
  import gradio as gr
 
2
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
 
4
+ # Change this to your model path / HF repo
5
+ # e.g. "basmala12/smol-medical-meadow-FT" if it's on Hugging Face
6
+ MODEL_NAME = "smol-medical-meadow-FT"
 
7
 
8
+ # Load model & tokenizer
 
 
 
9
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
10
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
 
 
 
 
 
 
11
 
12
  pipe = pipeline(
13
  "text-generation",
14
  model=model,
15
  tokenizer=tokenizer,
 
16
  )
17
 
 
 
 
 
18
  def respond(message, history, system_message, max_tokens, temperature, top_p):
19
+ # Build chat-style messages
20
+ messages = [{"role": "system", "content": system_message}]
21
+ messages.extend(history)
22
+ messages.append({"role": "user", "content": message})
23
+
24
+ # Convert to model-specific chat template
25
+ prompt = tokenizer.apply_chat_template(
26
+ messages,
27
+ tokenize=False,
28
+ add_generation_prompt=True,
29
+ )
30
+
31
+ # Generate
32
+ out = pipe(
33
  prompt,
34
  max_new_tokens=max_tokens,
35
  temperature=temperature,
36
  top_p=top_p,
37
  do_sample=True,
 
38
  )[0]["generated_text"]
39
 
40
+ # Extract assistant-only text
41
+ if "<|im_start|>assistant" in out:
42
+ out = out.split("<|im_start|>assistant", 1)[-1]
43
+ out = out.replace("<|im_end|>", "").strip()
 
 
 
44
 
45
+ return out
46
 
 
 
 
47
  chatbot = gr.ChatInterface(
48
  fn=respond,
49
  type="messages",
50
  additional_inputs=[
51
+ gr.Textbox("Give short answers with brief logical reasoning.", label="System message"),
52
+ gr.Slider(1, 512, value=256, step=1, label="Max new tokens"),
53
+ gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature"),
54
  gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p"),
55
  ],
56
  )