zhliOvO commited on
Commit
641ac85
·
1 Parent(s): 411e1a7

Test commit

Browse files
Files changed (2) hide show
  1. app.py +95 -4
  2. requirements.txt +4 -0
app.py CHANGED
@@ -1,7 +1,98 @@
 
 
 
1
  import gradio as gr
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import os
3
+ import torch
4
  import gradio as gr
5
+ from transformers import AutoTokenizer, AutoModelForCausalLM
6
 
7
+ HF_TOKEN = os.getenv("HF_TOKEN", None)
 
8
 
9
+ BASE_MODEL_ID = "meta-llama/Meta-Llama-3.1-8B-Instruct"
10
+ PEFT_MODEL_ID = "befm/Be.FM-8B"
11
+
12
+ USE_PEFT = True
13
+ try:
14
+ from peft import PeftModel, PeftConfig # noqa
15
+ except Exception:
16
+ USE_PEFT = False
17
+ print("[WARN] 'peft' not installed; running base model only.")
18
+
19
+ def load_model_and_tokenizer():
20
+ dtype = torch.float16 if torch.cuda.is_available() else torch.float32
21
+ tok = AutoTokenizer.from_pretrained(BASE_MODEL_ID, use_auth_token=HF_TOKEN)
22
+ if tok.pad_token is None:
23
+ tok.pad_token = tok.eos_token
24
+
25
+ base = AutoModelForCausalLM.from_pretrained(
26
+ BASE_MODEL_ID,
27
+ device_map="auto" if torch.cuda.is_available() else None,
28
+ torch_dtype=dtype,
29
+ use_auth_token=HF_TOKEN,
30
+ )
31
+
32
+ if USE_PEFT:
33
+ try:
34
+ _ = PeftConfig.from_pretrained(PEFT_MODEL_ID, use_auth_token=HF_TOKEN)
35
+ model = PeftModel.from_pretrained(base, PEFT_MODEL_ID, use_auth_token=HF_TOKEN)
36
+ print(f"[INFO] Loaded PEFT adapter: {PEFT_MODEL_ID}")
37
+ return model, tok
38
+ except Exception as e:
39
+ print(f"[WARN] Failed to load PEFT adapter: {e}")
40
+ return base, tok
41
+ return base, tok
42
+
43
+ model, tokenizer = load_model_and_tokenizer()
44
+ DEVICE = model.device
45
+
46
+ @torch.inference_mode()
47
+ def generate_response(prompt: str, max_new_tokens=512, temperature=0.7, top_p=0.9) -> str:
48
+ enc = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)
49
+ enc = {k: v.to(DEVICE) for k, v in enc.items()}
50
+ out = model.generate(
51
+ **enc,
52
+ max_new_tokens=max_new_tokens,
53
+ do_sample=True,
54
+ temperature=temperature,
55
+ top_p=top_p,
56
+ pad_token_id=tokenizer.eos_token_id,
57
+ )
58
+ return tokenizer.decode(out[0], skip_special_tokens=True)
59
+
60
+ def chat_fn(message, history, system_prompt, max_new_tokens, temperature, top_p):
61
+ # Build a simple conversation string
62
+ conv = []
63
+ if system_prompt:
64
+ conv.append(f"system: {system_prompt}")
65
+ for u, a in (history or []):
66
+ if u:
67
+ conv.append(f"user: {u}")
68
+ if a:
69
+ conv.append(f"assistant: {a}")
70
+ if message:
71
+ conv.append(f"user: {message}")
72
+ prompt = "\n".join(conv) + "\nassistant:"
73
+ reply = generate_response(
74
+ prompt,
75
+ max_new_tokens=max_new_tokens,
76
+ temperature=temperature,
77
+ top_p=top_p,
78
+ )
79
+ # Strip trailing
80
+ if "assistant:" in reply:
81
+ reply = reply.split("assistant:")[-1].strip()
82
+ return reply
83
+
84
+ demo = gr.ChatInterface(
85
+ fn=lambda message, history, system_prompt, max_new_tokens, temperature, top_p:
86
+ chat_fn(message, history, system_prompt, max_new_tokens, temperature, top_p),
87
+ additional_inputs=[
88
+ gr.Textbox(label="System prompt (optional)", placeholder="You are Be.FM assistant...", lines=2),
89
+ gr.Slider(16, 2048, value=512, step=16, label="max_new_tokens"),
90
+ gr.Slider(0.1, 1.5, value=0.7, step=0.05, label="temperature"),
91
+ gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="top_p"),
92
+ ],
93
+ title="Be.FM-8B (PEFT) on Meta-Llama-3.1-8B-Instruct",
94
+ description="Chat interface using Meta-Llama-3.1-8B-Instruct with PEFT adapter befm/Be.FM-8B."
95
+ )
96
+
97
+ if __name__ == "__main__":
98
+ demo.launch()
requirements.txt CHANGED
@@ -1 +1,5 @@
1
  gradio
 
 
 
 
 
1
  gradio
2
+ transformers
3
+ torch
4
+ accelerate
5
+ peft