ActiveYixiao commited on
Commit
bd59e63
·
verified ·
1 Parent(s): 56ef409

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -52
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import logging
2
  import textwrap
3
- from typing import Literal, Optional
4
 
5
  import gradio as gr
6
  import outlines
@@ -35,7 +35,7 @@ AVAILABLE_MODELS = [
35
  DEFAULT_MODEL_ID = AVAILABLE_MODELS[0]
36
 
37
  DEVICE_MAP = "auto"
38
- QUANTIZATION_BITS = None
39
 
40
  SYSTEM_PROMPT = textwrap.dedent("""
41
  You are an assistant tasked with grading answers to a mind reading ability test. You will be provided with the following information:
@@ -69,9 +69,9 @@ class ResponseModel(BaseModel):
69
  score: Literal["0", "1"]
70
 
71
 
72
- def get_outlines_model(
73
  model_id: str, device_map: str = "auto", quantization_bits: Optional[int] = 4
74
- ):
75
  if quantization_bits == 4:
76
  quantization_config = BitsAndBytesConfig(
77
  load_in_4bit=True,
@@ -85,9 +85,14 @@ def get_outlines_model(
85
  quantization_config = None
86
 
87
  if "longformer" in model_id:
88
- hf_model = AutoModelForSequenceClassification.from_pretrained(model_id)
89
- hf_tokenizer = AutoTokenizer.from_pretrained(model_id)
90
- return hf_model, hf_tokenizer
 
 
 
 
 
91
 
92
  peft_config = PeftConfig.from_pretrained(model_id)
93
  base_model_id = peft_config.base_model_name_or_path
@@ -97,13 +102,13 @@ def get_outlines_model(
97
  device_map=device_map,
98
  quantization_config=quantization_config,
99
  )
100
- hf_model = PeftModel.from_pretrained(base_model, model_id)
101
- hf_tokenizer = AutoTokenizer.from_pretrained(
102
  base_model_id, use_fast=True, clean_up_tokenization_spaces=True
103
  )
 
104
 
105
- model = outlines.from_transformers(hf_model, hf_tokenizer)
106
- return model
107
 
108
 
109
  def format_prompt(story: str, question: str, grading_scheme: str, answer: str) -> str:
@@ -121,52 +126,82 @@ def format_prompt(story: str, question: str, grading_scheme: str, answer: str) -
121
  def label_single_response_with_model(model_id, story, question, criteria, response):
122
  prompt = format_prompt(story, question, criteria, response)
123
 
124
- if "longformer" in model_id:
125
- model, tokenizer = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
126
- inputs = tokenizer(response, return_tensors="pt", truncation=True, padding=True)
127
- with torch.no_grad():
128
- logits = model(**inputs).logits
129
-
130
- if logits.shape[1] == 1:
131
- # Regression-style: apply sigmoid threshold at 0.5
132
- score = int(torch.sigmoid(logits).item() > 0.5)
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  else:
134
- # Classification-style: argmax over 2 labels
135
- score = torch.argmax(logits, dim=1).item()
136
- return str(score)
137
-
138
- else:
139
- model = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
140
- generator = Generator(model, ResponseModel) # pass schema
141
- result = generator(prompt)
142
- return result.score
143
 
144
 
145
  @spaces.GPU
146
  def label_multi_responses_with_model(model_id, story, question, criteria, response_file):
147
- df = pd.read_csv(response_file.name)
148
- assert "response" in df.columns, "CSV must contain a 'response' column."
149
- prompts = [
150
- format_prompt(story, question, criteria, resp) for resp in df["response"]
151
- ]
152
-
153
- if "longformer" in model_id:
154
- model, tokenizer = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
155
- inputs = tokenizer(df["response"].tolist(), return_tensors="pt", truncation=True, padding=True)
156
- with torch.no_grad():
157
- logits = model(**inputs).logits
158
- if logits.shape[1] == 1:
159
- scores = [str(int(torch.sigmoid(l) > 0.5)) for l in logits]
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  else:
161
- scores = [str(cls) for cls in torch.argmax(logits, dim=1).tolist()]
162
- else:
163
- model = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
164
- generator = Generator(model, ResponseModel)
165
- results = [generator(p) for p in prompts]
166
- scores = [r.score for r in results]
167
-
168
- df["score"] = scores
169
- return df
 
 
 
 
 
170
 
171
 
172
  with gr.Blocks(title="Zero-Shot Evaluation Grader") as iface:
@@ -208,4 +243,4 @@ with gr.Blocks(title="Zero-Shot Evaluation Grader") as iface:
208
  )
209
 
210
  if __name__ == "__main__":
211
- iface.launch(share=True)
 
1
  import logging
2
  import textwrap
3
+ from typing import Literal, Optional, Tuple, Union
4
 
5
  import gradio as gr
6
  import outlines
 
35
  DEFAULT_MODEL_ID = AVAILABLE_MODELS[0]
36
 
37
  DEVICE_MAP = "auto"
38
+ QUANTIZATION_BITS = 4 # Changed from None to 4 for better compatibility
39
 
40
  SYSTEM_PROMPT = textwrap.dedent("""
41
  You are an assistant tasked with grading answers to a mind reading ability test. You will be provided with the following information:
 
69
  score: Literal["0", "1"]
70
 
71
 
72
+ def get_model_and_tokenizer(
73
  model_id: str, device_map: str = "auto", quantization_bits: Optional[int] = 4
74
+ ) -> Tuple[Union[AutoModelForCausalLM, AutoModelForSequenceClassification], AutoTokenizer]:
75
  if quantization_bits == 4:
76
  quantization_config = BitsAndBytesConfig(
77
  load_in_4bit=True,
 
85
  quantization_config = None
86
 
87
  if "longformer" in model_id:
88
+ model = AutoModelForSequenceClassification.from_pretrained(
89
+ model_id,
90
+ device_map=device_map,
91
+ quantization_config=quantization_config # Added quantization for consistency
92
+ )
93
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
94
+ tokenizer.pad_token = tokenizer.eos_token # Add padding token
95
+ return model, tokenizer
96
 
97
  peft_config = PeftConfig.from_pretrained(model_id)
98
  base_model_id = peft_config.base_model_name_or_path
 
102
  device_map=device_map,
103
  quantization_config=quantization_config,
104
  )
105
+ model = PeftModel.from_pretrained(base_model, model_id)
106
+ tokenizer = AutoTokenizer.from_pretrained(
107
  base_model_id, use_fast=True, clean_up_tokenization_spaces=True
108
  )
109
+ tokenizer.pad_token = tokenizer.eos_token # Ensure padding token is set
110
 
111
+ return model, tokenizer
 
112
 
113
 
114
  def format_prompt(story: str, question: str, grading_scheme: str, answer: str) -> str:
 
126
  def label_single_response_with_model(model_id, story, question, criteria, response):
127
  prompt = format_prompt(story, question, criteria, response)
128
 
129
+ try:
130
+ model, tokenizer = get_model_and_tokenizer(model_id, DEVICE_MAP, QUANTIZATION_BITS)
131
+
132
+ if "longformer" in model_id:
133
+ # Process with Longformer
134
+ inputs = tokenizer(
135
+ prompt,
136
+ return_tensors="pt",
137
+ truncation=True,
138
+ padding=True,
139
+ max_length=4096
140
+ )
141
+ with torch.no_grad():
142
+ logits = model(**inputs).logits
143
+
144
+ if logits.shape[1] == 1:
145
+ # Regression-style
146
+ score = int(torch.sigmoid(logits).item() > 0.5)
147
+ else:
148
+ # Classification-style
149
+ score = torch.argmax(logits, dim=1).item()
150
+ return str(score)
151
  else:
152
+ # Process with other models using outlines
153
+ outlines_model = outlines.from_transformers(model, tokenizer)
154
+ generator = Generator(outlines_model, ResponseModel)
155
+ result = generator(prompt)
156
+ return result.score
157
+ except Exception as e:
158
+ logger.error(f"Error processing request: {str(e)}")
159
+ return f"Error: {str(e)}"
 
160
 
161
 
162
  @spaces.GPU
163
  def label_multi_responses_with_model(model_id, story, question, criteria, response_file):
164
+ try:
165
+ df = pd.read_csv(response_file.name)
166
+ assert "response" in df.columns, "CSV must contain a 'response' column."
167
+
168
+ model, tokenizer = get_model_and_tokenizer(model_id, DEVICE_MAP, QUANTIZATION_BITS)
169
+
170
+ if "longformer" in model_id:
171
+ # Process with Longformer
172
+ prompts = [
173
+ format_prompt(story, question, criteria, resp)
174
+ for resp in df["response"]
175
+ ]
176
+ inputs = tokenizer(
177
+ prompts,
178
+ return_tensors="pt",
179
+ truncation=True,
180
+ padding=True,
181
+ max_length=4096
182
+ )
183
+ with torch.no_grad():
184
+ logits = model(**inputs).logits
185
+
186
+ if logits.shape[1] == 1:
187
+ scores = [str(int(torch.sigmoid(l) > 0.5)) for l in logits]
188
+ else:
189
+ scores = [str(cls) for cls in torch.argmax(logits, dim=1).tolist()]
190
  else:
191
+ # Process with other models
192
+ outlines_model = outlines.from_transformers(model, tokenizer)
193
+ generator = Generator(outlines_model, ResponseModel)
194
+ scores = []
195
+ for resp in df["response"]:
196
+ prompt = format_prompt(story, question, criteria, resp)
197
+ result = generator(prompt)
198
+ scores.append(result.score)
199
+
200
+ df["score"] = scores
201
+ return df
202
+ except Exception as e:
203
+ logger.error(f"Error processing batch: {str(e)}")
204
+ return pd.DataFrame({"error": [str(e)]})
205
 
206
 
207
  with gr.Blocks(title="Zero-Shot Evaluation Grader") as iface:
 
243
  )
244
 
245
  if __name__ == "__main__":
246
+ iface.launch(share=True)