hallisky commited on
Commit
c9649d5
·
1 Parent(s): 7fd55a4

ADd buggy PEFT

Browse files
Files changed (1) hide show
  1. app.py +20 -4
app.py CHANGED
@@ -31,6 +31,7 @@ MODEL_PATHS = {
31
  "grade_more": "hallisky/lora-grade-highschool-llama-3-8b",
32
  "grade_less": "hallisky/lora-grade-elementary-llama-3-8b",
33
  }
 
34
 
35
  DESCRIPTION = """\
36
  # Authorship Obfuscation
@@ -47,8 +48,20 @@ if not torch.cuda.is_available():
47
  if torch.cuda.is_available():
48
  device = "cuda"
49
  model_id = "meta-llama/Meta-Llama-3-8B"
50
- model = AutoModelForCausalLM.from_pretrained(model_id).to(device) # device_map="auto" requires accelerate
51
- tokenizer = AutoTokenizer.from_pretrained(model_id)
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  # Global variable to store the latest obfuscation result
54
  user_id = str(uuid4()) # Generate a unique session-specific user ID
@@ -98,13 +111,15 @@ def greet(input_text, length, function_words, grade_level, sarcasm, formality, v
98
  f"Expository: {expository}"
99
  )
100
 
 
 
101
  with torch.no_grad():
102
  outputs = model.generate(
103
- input_ids=tokenizer(input_text, return_tensors="pt").input_ids.to(device),
104
  max_length=100,
105
  num_return_sequences=1,
106
  )
107
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
108
 
109
  # Save the new obfuscation result and reset feedback
110
  latest_obfuscation = {
@@ -123,6 +138,7 @@ def greet(input_text, length, function_words, grade_level, sarcasm, formality, v
123
  "narrative": narrative,
124
  "expository": expository
125
  },
 
126
  "output": response,
127
  "feedback_rating": "No Feedback Selected",
128
  "feedback_text": ""
 
31
  "grade_more": "hallisky/lora-grade-highschool-llama-3-8b",
32
  "grade_less": "hallisky/lora-grade-elementary-llama-3-8b",
33
  }
34
+ FIRST_MODEL = list(MODEL_PATHS.keys())[0]
35
 
36
  DESCRIPTION = """\
37
  # Authorship Obfuscation
 
48
  if torch.cuda.is_available():
49
  device = "cuda"
50
  model_id = "meta-llama/Meta-Llama-3-8B"
51
+
52
+ tokenizer = AutoTokenizer.from_pretrained(model_id, add_bos_token=True, add_eos_token=False, padding_side="left")
53
+ tokenizer.add_special_tokens({'pad_token': '<padding_token>'})
54
+
55
+ base_model = AutoModelForCausalLM.from_pretrained(model_id).to(device) # device_map="auto" requires accelerate
56
+ base_model.resize_token_embeddings(len(tokenizer)) # Resize to add pad token. Value doesn't matter
57
+ # Load in the first model
58
+ model = PeftModel.from_pretrained(base_model, MODEL_PATHS[FIRST_MODEL], adapter_name=FIRST_MODEL).to(device)
59
+ # Load in the rest of the models
60
+ for cur_adapter in MODEL_PATHS.keys():
61
+ if cur_adapter != FIRST_MODEL:
62
+ model.load_adapter(MODEL_PATHS[cur_adapter], adapter_name=cur_adapter)
63
+
64
+ model.eval()
65
 
66
  # Global variable to store the latest obfuscation result
67
  user_id = str(uuid4()) # Generate a unique session-specific user ID
 
111
  f"Expository: {expository}"
112
  )
113
 
114
+ converted_text = convert_data_to_format(input_text)
115
+ response = converted_text
116
  with torch.no_grad():
117
  outputs = model.generate(
118
+ input_ids=tokenizer(converted_text, return_tensors="pt").input_ids.to(device),
119
  max_length=100,
120
  num_return_sequences=1,
121
  )
122
+ # response = tokenizer.decode(outputs[0], skip_special_tokens=True)
123
 
124
  # Save the new obfuscation result and reset feedback
125
  latest_obfuscation = {
 
138
  "narrative": narrative,
139
  "expository": expository
140
  },
141
+ "input": input_text,
142
  "output": response,
143
  "feedback_rating": "No Feedback Selected",
144
  "feedback_text": ""