nambn0321 commited on
Commit
c4ef707
·
verified ·
1 Parent(s): 24b7c02

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -0
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ from peft import PeftModel, PeftConfig
4
+ import gradio as gr
5
+
6
+ # Replace this with your Hugging Face model repo ID
7
+ model_repo = "nambn0321/LLM_model"
8
+
9
+ # Load LoRA adapter config from the Hub
10
+ peft_config = PeftConfig.from_pretrained(model_repo)
11
+
12
+ # Load base model (will automatically download from Hub)
13
+ base_model = AutoModelForCausalLM.from_pretrained(
14
+ peft_config.base_model_name_or_path,
15
+ return_dict=True,
16
+ torch_dtype=torch.float16,
17
+ device_map="auto"
18
+ )
19
+
20
+ # Load adapter weights from your fine-tuned repo
21
+ model = PeftModel.from_pretrained(base_model, model_repo)
22
+
23
+ # Load tokenizer from the Hub repo
24
+ tokenizer = AutoTokenizer.from_pretrained(model_repo, use_fast=False)
25
+
26
+ def generate_response(prompt, max_tokens=128, temperature=0.7, top_p=0.9):
27
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
28
+ outputs = model.generate(
29
+ **inputs,
30
+ max_new_tokens=max_tokens,
31
+ temperature=temperature,
32
+ top_p=top_p,
33
+ do_sample=True
34
+ )
35
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
36
+
37
+ iface = gr.Interface(
38
+ fn=generate_response,
39
+ inputs=[
40
+ gr.Textbox(lines=4, label="Prompt"),
41
+ gr.Slider(16, 512, value=128, step=16, label="Max Tokens"),
42
+ gr.Slider(0.1, 1.5, value=0.7, label="Temperature"),
43
+ gr.Slider(0.1, 1.0, value=0.9, label="Top-p")
44
+ ],
45
+ outputs="text",
46
+ title="Fine-Tuned LLM",
47
+ description="Interact with your fine-tuned model hosted on Hugging Face Hub."
48
+ )
49
+
50
+ iface.launch()