ActiveYixiao commited on
Commit
e6a7fa6
·
verified ·
1 Parent(s): c38da00

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +108 -86
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import logging
2
  import textwrap
3
  from typing import Literal, Optional
4
-
5
  import gradio as gr
6
  import outlines
7
  import pandas as pd
@@ -19,8 +18,9 @@ from transformers import (
19
  logging.basicConfig(level=logging.INFO)
20
  logger = logging.getLogger(__name__)
21
 
 
22
  DEVICE_MAP = "auto"
23
- QUANTIZATION_BITS = None
24
  TEMPERATURE = 0.0
25
 
26
  AVAILABLE_MODELS = [
@@ -37,12 +37,40 @@ AVAILABLE_MODELS = [
37
  ]
38
  DEFAULT_MODEL_ID = AVAILABLE_MODELS[0]
39
 
40
- # Use a simpler prompt format that might be closer to your training data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  PROMPT_TEMPLATE = textwrap.dedent("""
42
- Story: {story}
43
- Question: {question}
44
- Grading Scheme: {grading_scheme}
45
- Answer: {answer}
 
 
 
 
 
 
 
 
 
 
 
 
46
  Score:""").strip()
47
 
48
 
@@ -51,14 +79,9 @@ class ResponseModel(BaseModel):
51
  score: Literal["0", "1"]
52
 
53
 
54
- # Cache models to avoid reloading on every request
55
- _model_cache = {}
56
-
57
-
58
- def get_model_and_tokenizer(model_id: str, device_map: str = "auto", quantization_bits: Optional[int] = None):
59
- if model_id in _model_cache:
60
- return _model_cache[model_id]
61
-
62
  if quantization_bits == 4:
63
  quantization_config = BitsAndBytesConfig(
64
  load_in_4bit=True,
@@ -72,90 +95,86 @@ def get_model_and_tokenizer(model_id: str, device_map: str = "auto", quantizatio
72
  quantization_config = None
73
 
74
  if "longformer" in model_id:
75
- model = AutoModelForSequenceClassification.from_pretrained(model_id)
76
- tokenizer = AutoTokenizer.from_pretrained(model_id)
77
- result = (model, tokenizer, "classification")
78
- else:
79
- # For other models, use the same approach as your original script
80
- peft_config = PeftConfig.from_pretrained(model_id)
81
- base_model_id = peft_config.base_model_name_or_path
82
-
83
- model = AutoModelForCausalLM.from_pretrained(
84
- base_model_id,
85
- device_map=device_map,
86
- quantization_config=quantization_config,
87
- )
88
- model = PeftModel.from_pretrained(model, model_id)
89
- tokenizer = AutoTokenizer.from_pretrained(
90
- base_model_id, use_fast=True, clean_up_tokenization_spaces=True
91
- )
92
-
93
- # Convert to outlines model
94
- outlines_model = outlines.models.Transformers(model, tokenizer=tokenizer)
95
- result = (outlines_model, tokenizer, "generation")
96
-
97
- _model_cache[model_id] = result
98
- return result
99
 
100
 
101
  def format_prompt(story: str, question: str, grading_scheme: str, answer: str) -> str:
102
- return PROMPT_TEMPLATE.format(
 
103
  story=story.strip(),
104
  question=question.strip(),
105
  grading_scheme=grading_scheme.strip(),
106
  answer=answer.strip(),
107
  )
 
 
 
108
 
109
 
110
  @spaces.GPU
111
  def label_single_response_with_model(model_id, story, question, criteria, response):
112
- try:
113
- prompt = format_prompt(story, question, criteria, response)
114
- model, tokenizer, model_type = get_model_and_tokenizer(model_id, DEVICE_MAP, QUANTIZATION_BITS)
115
-
116
- if model_type == "classification":
117
- # For Longformer models
118
- inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True)
119
- with torch.no_grad():
120
- logits = model(**inputs).logits
121
- predicted_class = torch.argmax(logits, dim=1).item()
122
- return str(predicted_class)
123
- else:
124
- # For generative models - using the new Outlines API
125
- generator = outlines.generate.json(model, ResponseModel)
126
- result = generator(prompt)
127
- return result.score
128
- except Exception as e:
129
- logger.error(f"Error in label_single_response_with_model: {str(e)}")
130
- return "Error: " + str(e)
131
 
132
 
133
  @spaces.GPU
134
- def label_multi_responses_with_model(model_id, story, question, criteria, response_file):
135
- try:
136
- df = pd.read_csv(response_file.name)
137
- assert "response" in df.columns, "CSV must contain a 'response' column."
138
-
139
- model, tokenizer, model_type = get_model_and_tokenizer(model_id, DEVICE_MAP, QUANTIZATION_BITS)
140
- prompts = [format_prompt(story, question, criteria, resp) for resp in df["response"]]
141
-
142
- if model_type == "classification":
143
- inputs = tokenizer(prompts, return_tensors="pt", truncation=True, padding=True)
144
- with torch.no_grad():
145
- logits = model(**inputs).logits
146
- predicted_classes = torch.argmax(logits, dim=1).tolist()
147
- scores = [str(cls) for cls in predicted_classes]
148
- else:
149
- # For generative models - using the new Outlines API
150
- generator = outlines.generate.json(model, ResponseModel)
151
- results = generator(prompts)
152
- scores = [r.score for r in results]
153
-
154
- df["score"] = scores
155
- return df
156
- except Exception as e:
157
- logger.error(f"Error in label_multi_responses_with_model: {str(e)}")
158
- return f"Error: {str(e)}"
159
 
160
 
161
  def single_response_ui(model_id):
@@ -176,7 +195,10 @@ def single_response_ui(model_id):
176
 
177
  def multi_response_ui(model_id):
178
  return gr.Interface(
179
- fn=lambda story, question, criteria, response_file: label_multi_responses_with_model(
 
 
 
180
  model_id.value, story, question, criteria, response_file
181
  ),
182
  inputs=[
@@ -196,7 +218,7 @@ with gr.Blocks(title="Zero-Shot Evaluation Grader") as iface:
196
  model_selector = gr.Dropdown(
197
  label="Select Model",
198
  choices=AVAILABLE_MODELS,
199
- value=DEFAULT_MODEL_ID,
200
  )
201
  selected_model_id = gr.State(value=DEFAULT_MODEL_ID)
202
 
 
1
  import logging
2
  import textwrap
3
  from typing import Literal, Optional
 
4
  import gradio as gr
5
  import outlines
6
  import pandas as pd
 
18
  logging.basicConfig(level=logging.INFO)
19
  logger = logging.getLogger(__name__)
20
 
21
+ MODEL_ID = "rshwndsz/ft-longformer-base-4096"
22
  DEVICE_MAP = "auto"
23
+ QUANTIZATION_BITS = 4
24
  TEMPERATURE = 0.0
25
 
26
  AVAILABLE_MODELS = [
 
37
  ]
38
  DEFAULT_MODEL_ID = AVAILABLE_MODELS[0]
39
 
40
+ # Exact SYSTEM_PROMPT from training data
41
+ SYSTEM_PROMPT = textwrap.dedent("""
42
+ You are an assistant tasked with grading answers to a mind reading ability test. You will be provided with the following information:
43
+
44
+ 1. A story that was presented to participants as context
45
+ 2. The question that participants were asked to answer
46
+ 3. A grading scheme to evaluate the answers (Correct Responses:1, incorrect response:0, Incomplete response:0, Irrelevant:0)
47
+ 4. Grading examples
48
+ 5. A participant answer
49
+
50
+ Your task is to grade each answer according to the grading scheme. For each answer, you should:
51
+
52
+ 1. Carefully read and understand the answer and compare it to the grading criteria
53
+ 2. Assigning an score 1 or 0 for each answer.
54
+ """).strip()
55
+
56
+ # Exact PROMPT_TEMPLATE from training data
57
  PROMPT_TEMPLATE = textwrap.dedent("""
58
+ <Story>
59
+ {story}
60
+ </Story>
61
+
62
+ <Question>
63
+ {question}
64
+ </Question>
65
+
66
+ <GradingScheme>
67
+ {grading_scheme}
68
+ </GradingScheme>
69
+
70
+ <Answer>
71
+ {answer}
72
+ </Answer>
73
+
74
  Score:""").strip()
75
 
76
 
 
79
  score: Literal["0", "1"]
80
 
81
 
82
+ def get_outlines_model(
83
+ model_id: str, device_map: str = "auto", quantization_bits: Optional[int] = 4
84
+ ):
 
 
 
 
 
85
  if quantization_bits == 4:
86
  quantization_config = BitsAndBytesConfig(
87
  load_in_4bit=True,
 
95
  quantization_config = None
96
 
97
  if "longformer" in model_id:
98
+ hf_model = AutoModelForSequenceClassification.from_pretrained(model_id)
99
+ hf_tokenizer = AutoTokenizer.from_pretrained(model_id)
100
+ return hf_model, hf_tokenizer
101
+
102
+ peft_config = PeftConfig.from_pretrained(model_id)
103
+ base_model_id = peft_config.base_model_name_or_path
104
+
105
+ base_model = AutoModelForCausalLM.from_pretrained(
106
+ base_model_id,
107
+ device_map=device_map,
108
+ quantization_config=quantization_config,
109
+ )
110
+ hf_model = PeftModel.from_pretrained(base_model, model_id)
111
+ hf_tokenizer = AutoTokenizer.from_pretrained(
112
+ base_model_id, use_fast=True, clean_up_tokenization_spaces=True
113
+ )
114
+
115
+ # Updated for new outlines API
116
+ model = outlines.models.Transformers(hf_model, hf_tokenizer)
117
+ return model
 
 
 
 
118
 
119
 
120
  def format_prompt(story: str, question: str, grading_scheme: str, answer: str) -> str:
121
+ # Exact format used during training
122
+ prompt = PROMPT_TEMPLATE.format(
123
  story=story.strip(),
124
  question=question.strip(),
125
  grading_scheme=grading_scheme.strip(),
126
  answer=answer.strip(),
127
  )
128
+ # Exact concatenation used during training
129
+ full_prompt = SYSTEM_PROMPT + "\n" + prompt
130
+ return full_prompt
131
 
132
 
133
  @spaces.GPU
134
  def label_single_response_with_model(model_id, story, question, criteria, response):
135
+ prompt = format_prompt(story, question, criteria, response)
136
+
137
+ if "longformer" in model_id:
138
+ model, tokenizer = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
139
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True)
140
+ with torch.no_grad():
141
+ logits = model(**inputs).logits
142
+ predicted_class = torch.argmax(logits, dim=1).item()
143
+ return str(predicted_class)
144
+ else:
145
+ model = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
146
+ # Updated for new outlines API
147
+ generator = outlines.generate.json(model, ResponseModel)
148
+ result = generator(prompt)
149
+ return result.score
 
 
 
 
150
 
151
 
152
  @spaces.GPU
153
+ def label_multi_responses_with_model(
154
+ model_id, story, question, criteria, response_file
155
+ ):
156
+ df = pd.read_csv(response_file.name)
157
+ assert "response" in df.columns, "CSV must contain a 'response' column."
158
+ prompts = [
159
+ format_prompt(story, question, criteria, resp) for resp in df["response"]
160
+ ]
161
+
162
+ if "longformer" in model_id:
163
+ model, tokenizer = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
164
+ inputs = tokenizer(prompts, return_tensors="pt", truncation=True, padding=True)
165
+ with torch.no_grad():
166
+ logits = model(**inputs).logits
167
+ predicted_classes = torch.argmax(logits, dim=1).tolist()
168
+ scores = [str(cls) for cls in predicted_classes]
169
+ else:
170
+ model = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
171
+ # Updated for new outlines API
172
+ generator = outlines.generate.json(model, ResponseModel)
173
+ results = generator(prompts)
174
+ scores = [r.score for r in results]
175
+
176
+ df["score"] = scores
177
+ return df
178
 
179
 
180
  def single_response_ui(model_id):
 
195
 
196
  def multi_response_ui(model_id):
197
  return gr.Interface(
198
+ fn=lambda story,
199
+ question,
200
+ criteria,
201
+ response_file: label_multi_responses_with_model(
202
  model_id.value, story, question, criteria, response_file
203
  ),
204
  inputs=[
 
218
  model_selector = gr.Dropdown(
219
  label="Select Model",
220
  choices=AVAILABLE_MODELS,
221
+ value=AVAILABLE_MODELS[0],
222
  )
223
  selected_model_id = gr.State(value=DEFAULT_MODEL_ID)
224