satyanayak commited on
Commit
7d089e3
·
1 Parent(s): 138eff8

peft model logic added

Browse files
Files changed (2) hide show
  1. app.py +70 -34
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,50 +1,86 @@
1
  import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
3
  import torch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  # Load base model and tokenizer
6
  base_model_id = "satyanayak/gemma-3-base"
7
- base_tokenizer = AutoTokenizer.from_pretrained(base_model_id)
8
- base_model = AutoModelForCausalLM.from_pretrained(
9
- base_model_id,
10
- torch_dtype=torch.float16,
11
- device_map="auto"
12
- )
13
 
14
  # Load finetuned model and tokenizer
15
  finetuned_model_id = "satyanayak/gemma-3-GRPO"
16
- finetuned_tokenizer = AutoTokenizer.from_pretrained(finetuned_model_id)
17
- finetuned_model = AutoModelForCausalLM.from_pretrained(
18
- finetuned_model_id,
19
- torch_dtype=torch.float16,
20
- device_map="auto"
21
- )
22
 
23
  def generate_base_response(prompt, max_length=512):
24
- inputs = base_tokenizer(prompt, return_tensors="pt").to(base_model.device)
25
- outputs = base_model.generate(
26
- **inputs,
27
- max_length=max_length,
28
- num_return_sequences=1,
29
- temperature=0.7,
30
- do_sample=True,
31
- pad_token_id=base_tokenizer.eos_token_id
32
- )
33
- response = base_tokenizer.decode(outputs[0], skip_special_tokens=True)
34
- return response
 
 
 
 
 
 
35
 
36
  def generate_finetuned_response(prompt, max_length=512):
37
- inputs = finetuned_tokenizer(prompt, return_tensors="pt").to(finetuned_model.device)
38
- outputs = finetuned_model.generate(
39
- **inputs,
40
- max_length=max_length,
41
- num_return_sequences=1,
42
- temperature=0.7,
43
- do_sample=True,
44
- pad_token_id=finetuned_tokenizer.eos_token_id
45
- )
46
- response = finetuned_tokenizer.decode(outputs[0], skip_special_tokens=True)
47
- return response
 
 
 
 
 
 
48
 
49
  # Example prompts
50
  examples = [
 
1
  import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ from peft import PeftModel, PeftConfig
4
  import torch
5
+ import os
6
+
7
+ def load_model(model_id, model_type="base"):
8
+ try:
9
+ if model_type == "base":
10
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
11
+ model = AutoModelForCausalLM.from_pretrained(
12
+ model_id,
13
+ torch_dtype=torch.float16,
14
+ device_map="auto"
15
+ )
16
+ return tokenizer, model
17
+ else: # finetuned model with PEFT
18
+ # Load the base model first
19
+ base_model_id = "satyanayak/gemma-3-base"
20
+ tokenizer = AutoTokenizer.from_pretrained(base_model_id)
21
+ base_model = AutoModelForCausalLM.from_pretrained(
22
+ base_model_id,
23
+ torch_dtype=torch.float16,
24
+ device_map="auto"
25
+ )
26
+
27
+ # Load and merge the PEFT adapters
28
+ model = PeftModel.from_pretrained(
29
+ base_model,
30
+ model_id,
31
+ torch_dtype=torch.float16,
32
+ device_map="auto"
33
+ )
34
+ return tokenizer, model
35
+ except Exception as e:
36
+ print(f"Error loading {model_type} model: {str(e)}")
37
+ return None, None
38
 
39
  # Load base model and tokenizer
40
  base_model_id = "satyanayak/gemma-3-base"
41
+ base_tokenizer, base_model = load_model(base_model_id, "base")
 
 
 
 
 
42
 
43
  # Load finetuned model and tokenizer
44
  finetuned_model_id = "satyanayak/gemma-3-GRPO"
45
+ finetuned_tokenizer, finetuned_model = load_model(finetuned_model_id, "finetuned")
 
 
 
 
 
46
 
47
  def generate_base_response(prompt, max_length=512):
48
+ if base_model is None or base_tokenizer is None:
49
+ return "Error: Base model failed to load. Please check if the model files are properly uploaded to Hugging Face."
50
+
51
+ try:
52
+ inputs = base_tokenizer(prompt, return_tensors="pt").to(base_model.device)
53
+ outputs = base_model.generate(
54
+ **inputs,
55
+ max_length=max_length,
56
+ num_return_sequences=1,
57
+ temperature=0.7,
58
+ do_sample=True,
59
+ pad_token_id=base_tokenizer.eos_token_id
60
+ )
61
+ response = base_tokenizer.decode(outputs[0], skip_special_tokens=True)
62
+ return response
63
+ except Exception as e:
64
+ return f"Error generating response with base model: {str(e)}"
65
 
66
  def generate_finetuned_response(prompt, max_length=512):
67
+ if finetuned_model is None or finetuned_tokenizer is None:
68
+ return "Error: Finetuned model failed to load. Please check if the model files are properly uploaded to Hugging Face."
69
+
70
+ try:
71
+ inputs = finetuned_tokenizer(prompt, return_tensors="pt").to(finetuned_model.device)
72
+ outputs = finetuned_model.generate(
73
+ **inputs,
74
+ max_length=max_length,
75
+ num_return_sequences=1,
76
+ temperature=0.7,
77
+ do_sample=True,
78
+ pad_token_id=finetuned_tokenizer.eos_token_id
79
+ )
80
+ response = finetuned_tokenizer.decode(outputs[0], skip_special_tokens=True)
81
+ return response
82
+ except Exception as e:
83
+ return f"Error generating response with finetuned model: {str(e)}"
84
 
85
  # Example prompts
86
  examples = [
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  gradio>=4.19.2
2
  transformers>=4.38.0
3
  torch>=2.2.0
4
- accelerate>=0.27.0
 
 
1
  gradio>=4.19.2
2
  transformers>=4.38.0
3
  torch>=2.2.0
4
+ accelerate>=0.27.0
5
+ peft>=0.9.0