xy63 commited on
Commit
2519276
·
verified ·
1 Parent(s): a994ca2

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -183
app.py CHANGED
@@ -3,7 +3,6 @@ import spaces
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
4
  import torch
5
  from threading import Thread
6
- import re
7
 
8
  from marker.convert import convert_single_pdf
9
  from marker.output import markdown_exists, save_markdown, get_markdown_filepath
@@ -38,6 +37,7 @@ model = AutoModelForCausalLM.from_pretrained(
38
  device_map="auto"
39
  )
40
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
41
 
42
  # Define prompts
43
  SYSTEM_PROMPT_TEMPLATE = """You are an expert reviewer for AI conferences. You follow best practices and review papers according to the reviewer guidelines.
@@ -87,7 +87,8 @@ Please assign the paper a numerical rating on the following scale to indicate th
87
  2: fair
88
  1: poor
89
 
90
- **Justification for Soundness Rating:** Please provide specific reasons for your soundness score, explaining which aspects of the technical claims, methodology, or evidence support your rating.
 
91
 
92
  ## Presentation
93
  Please assign the paper a numerical rating on the following scale to indicate the quality of the presentation. This should take into account the writing style and clarity, as well as contextualization relative to prior work. Choose from the following:
@@ -96,7 +97,8 @@ Please assign the paper a numerical rating on the following scale to indicate th
96
  2: fair
97
  1: poor
98
 
99
- **Justification for Presentation Rating:** Please explain your presentation score by addressing specific aspects of writing clarity, organization, figure quality, and how well the work is positioned within existing literature.
 
100
 
101
  ## Contribution
102
  Please assign the paper a numerical rating on the following scale to indicate the quality of the overall contribution this paper makes to the research area being studied. Are the questions being asked important? Does the paper bring a significant originality of ideas and/or execution? Are the results valuable to share with the broader ICLR community? Choose from the following:
@@ -105,7 +107,8 @@ Please assign the paper a numerical rating on the following scale to indicate th
105
  2: fair
106
  1: poor
107
 
108
- **Justification for Contribution Rating:** Please justify your contribution score by explaining the significance of the research questions, the originality of the approach, and the potential impact on the field.
 
109
 
110
  ## Strengths
111
  A substantive assessment of the strengths of the paper, touching on each of the following dimensions: originality, quality, clarity, and significance. We encourage reviewers to be broad in their definitions of originality and significance. For example, originality may arise from a new definition or problem formulation, creative combinations of existing ideas, application to a new domain, or removing limitations from prior results.
@@ -140,16 +143,10 @@ Please provide an "overall score" for this submission. Choose from the following
140
  8: accept, good paper
141
  10: strong accept, should be highlighted at the conference
142
 
143
- **Justification for Overall Rating:** Please provide a comprehensive justification for your overall rating, synthesizing the key strengths and weaknesses that led to this decision. Explain how you weighted different aspects of the paper in arriving at your final score.
144
 
145
  """
146
 
147
- JUSTIFICATION_PROMPT = """Based on the review above, please provide detailed justifications for each numerical rating. For each rating section, explain WHY you gave that specific score:
148
-
149
- {missing_justifications}
150
-
151
- Please provide substantive explanations that reference specific aspects of the paper. Each justification should be 2-4 sentences explaining your reasoning."""
152
-
153
  # functions
154
  def create_messages(review_fields, paper_text):
155
  messages = [
@@ -158,100 +155,6 @@ def create_messages(review_fields, paper_text):
158
  ]
159
  return messages
160
 
161
- def extract_ratings(review_text):
162
- """Extract numerical ratings from the review"""
163
- ratings = {}
164
-
165
- # Pattern to find ratings
166
- soundness_pattern = r'## Soundness\s*\n.*?(\d):\s*\w+'
167
- presentation_pattern = r'## Presentation\s*\n.*?(\d):\s*\w+'
168
- contribution_pattern = r'## Contribution\s*\n.*?(\d):\s*\w+'
169
- overall_pattern = r'## Rating\s*\n.*?(\d+):\s*[^#]+'
170
-
171
- soundness_match = re.search(soundness_pattern, review_text, re.IGNORECASE | re.DOTALL)
172
- if soundness_match:
173
- ratings['soundness'] = soundness_match.group(1)
174
-
175
- presentation_match = re.search(presentation_pattern, review_text, re.IGNORECASE | re.DOTALL)
176
- if presentation_match:
177
- ratings['presentation'] = presentation_match.group(1)
178
-
179
- contribution_match = re.search(contribution_pattern, review_text, re.IGNORECASE | re.DOTALL)
180
- if contribution_match:
181
- ratings['contribution'] = contribution_match.group(1)
182
-
183
- overall_match = re.search(overall_pattern, review_text, re.IGNORECASE | re.DOTALL)
184
- if overall_match:
185
- ratings['overall'] = overall_match.group(1)
186
-
187
- return ratings
188
-
189
- def check_for_justifications(review_text):
190
- """Check which justifications are missing"""
191
- missing = []
192
-
193
- # Check for each justification
194
- if "justification for soundness" not in review_text.lower() or \
195
- not re.search(r'justification for soundness.*?:\s*.{20,}', review_text, re.IGNORECASE | re.DOTALL):
196
- missing.append("Soundness")
197
-
198
- if "justification for presentation" not in review_text.lower() or \
199
- not re.search(r'justification for presentation.*?:\s*.{20,}', review_text, re.IGNORECASE | re.DOTALL):
200
- missing.append("Presentation")
201
-
202
- if "justification for contribution" not in review_text.lower() or \
203
- not re.search(r'justification for contribution.*?:\s*.{20,}', review_text, re.IGNORECASE | re.DOTALL):
204
- missing.append("Contribution")
205
-
206
- if "justification for overall rating" not in review_text.lower() or \
207
- not re.search(r'justification for overall rating.*?:\s*.{20,}', review_text, re.IGNORECASE | re.DOTALL):
208
- missing.append("Overall Rating")
209
-
210
- return missing
211
-
212
- def insert_justifications(original_review, justifications_text):
213
- """Insert the generated justifications into the appropriate places in the review"""
214
- review = original_review
215
-
216
- # Extract individual justifications from the response
217
- justification_dict = {}
218
-
219
- # Parse justifications for each category
220
- patterns = {
221
- 'soundness': r'(?:soundness|Soundness).*?justification.*?:(.*?)(?=\n\n|\n(?:Presentation|Contribution|Overall|$))',
222
- 'presentation': r'(?:presentation|Presentation).*?justification.*?:(.*?)(?=\n\n|\n(?:Contribution|Overall|$))',
223
- 'contribution': r'(?:contribution|Contribution).*?justification.*?:(.*?)(?=\n\n|\n(?:Overall|$))',
224
- 'overall': r'(?:overall rating|Overall Rating).*?justification.*?:(.*?)(?=\n\n|$)'
225
- }
226
-
227
- for key, pattern in patterns.items():
228
- match = re.search(pattern, justifications_text, re.IGNORECASE | re.DOTALL)
229
- if match:
230
- justification_dict[key] = match.group(1).strip()
231
-
232
- # Insert justifications into the review
233
- if 'soundness' in justification_dict:
234
- pattern = r'(## Soundness.*?\d:\s*\w+)\n'
235
- replacement = f'\\1\n\n**Justification for Soundness Rating:** {justification_dict["soundness"]}\n'
236
- review = re.sub(pattern, replacement, review, flags=re.DOTALL)
237
-
238
- if 'presentation' in justification_dict:
239
- pattern = r'(## Presentation.*?\d:\s*\w+)\n'
240
- replacement = f'\\1\n\n**Justification for Presentation Rating:** {justification_dict["presentation"]}\n'
241
- review = re.sub(pattern, replacement, review, flags=re.DOTALL)
242
-
243
- if 'contribution' in justification_dict:
244
- pattern = r'(## Contribution.*?\d:\s*\w+)\n'
245
- replacement = f'\\1\n\n**Justification for Contribution Rating:** {justification_dict["contribution"]}\n'
246
- review = re.sub(pattern, replacement, review, flags=re.DOTALL)
247
-
248
- if 'overall' in justification_dict:
249
- pattern = r'(## Rating.*?\d+:\s*[^#]+)\n'
250
- replacement = f'\\1\n\n**Justification for Overall Rating:** {justification_dict["overall"]}\n'
251
- review = re.sub(pattern, replacement, review, flags=re.DOTALL)
252
-
253
- return review
254
-
255
  @spaces.GPU()
256
  def convert_file(filepath):
257
  full_text, images, out_metadata = convert_single_pdf(
@@ -278,6 +181,10 @@ def process_file(file):
278
  except spaces.zero.gradio.HTMLError as e:
279
  print(e)
280
  return "Error. GPU quota exceeded. Please return later."
 
 
 
 
281
  except Exception as e:
282
  print(traceback.format_exc())
283
  print(f"Error converting {filepath}: {e}")
@@ -285,92 +192,33 @@ def process_file(file):
285
  return paper_text
286
 
287
 
288
- @spaces.GPU(duration=120)
289
  def generate(paper_text, review_template):
290
- # First generate the main review
291
  messages = create_messages(review_template, paper_text)
292
  input_ids = tokenizer.apply_chat_template(
293
  messages,
294
  add_generation_prompt=True,
295
  return_tensors='pt'
296
  ).to(model.device)
297
-
298
- print(f"Generating initial review...")
299
-
300
- # Generate initial review
301
- outputs = model.generate(
302
- input_ids,
303
- max_new_tokens=4096,
304
- do_sample=True,
305
- temperature=0.6,
306
- top_p=0.9,
307
- pad_token_id=tokenizer.eos_token_id
308
- )
309
-
310
- initial_review = tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True)
311
-
312
- # Check for missing justifications
313
- missing_justifications = check_for_justifications(initial_review)
314
-
315
- if missing_justifications:
316
- print(f"Missing justifications for: {missing_justifications}")
317
- print("Generating justifications...")
318
-
319
- # Extract ratings from the review
320
- ratings = extract_ratings(initial_review)
321
-
322
- # Build the request for missing justifications
323
- missing_text = ""
324
- if "Soundness" in missing_justifications and 'soundness' in ratings:
325
- missing_text += f"\n- Soundness (you rated it {ratings['soundness']}): Explain why you gave this soundness score."
326
- if "Presentation" in missing_justifications and 'presentation' in ratings:
327
- missing_text += f"\n- Presentation (you rated it {ratings['presentation']}): Explain why you gave this presentation score."
328
- if "Contribution" in missing_justifications and 'contribution' in ratings:
329
- missing_text += f"\n- Contribution (you rated it {ratings['contribution']}): Explain why you gave this contribution score."
330
- if "Overall Rating" in missing_justifications and 'overall' in ratings:
331
- missing_text += f"\n- Overall Rating (you rated it {ratings['overall']}): Explain why you gave this overall rating."
332
-
333
- # Create follow-up message
334
- follow_up_messages = messages + [
335
- {"role": "assistant", "content": initial_review},
336
- {"role": "user", "content": JUSTIFICATION_PROMPT.format(missing_justifications=missing_text)}
337
- ]
338
-
339
- follow_up_input_ids = tokenizer.apply_chat_template(
340
- follow_up_messages,
341
- add_generation_prompt=True,
342
- return_tensors='pt'
343
- ).to(model.device)
344
-
345
- # Generate justifications
346
- justification_outputs = model.generate(
347
- follow_up_input_ids,
348
- max_new_tokens=1024,
349
- do_sample=True,
350
- temperature=0.6,
351
- top_p=0.9,
352
- pad_token_id=tokenizer.eos_token_id
353
- )
354
-
355
- justifications = tokenizer.decode(
356
- justification_outputs[0][follow_up_input_ids.shape[-1]:],
357
- skip_special_tokens=True
358
- )
359
-
360
- # Combine the initial review with justifications
361
- final_review = insert_justifications(initial_review, justifications)
362
-
363
- # Use streaming for the final output
364
- streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, decode_kwargs=dict(skip_special_tokens=True))
365
- for chunk in final_review.split():
366
- yield " ".join([chunk])
367
- yield final_review
368
- else:
369
- # If justifications are already present, return the initial review
370
- yield initial_review
371
 
372
 
373
  # ui
 
 
 
 
 
 
 
374
  title = """<h1 align="center">OpenReviewer</h1>
375
  <div align="center">Using <a href="https://huggingface.co/maxidl/Llama-OpenReviewer-8B" target="_blank"><code>Llama-OpenReviewer-8B</code></a> - Built with Llama</div>
376
  """
@@ -389,8 +237,6 @@ Take a look at the Review Template to properly interpret the generated review. Y
389
 
390
  To obtain more than one review, just generate again.
391
 
392
- **Note:** The system will automatically add justifications for all numerical ratings if they are not initially provided.
393
-
394
  **GPU quota:** If exceeded, either sign in with your HF account or come back later. Your quota has a half-life of 2 hours.
395
 
396
  """
@@ -414,5 +260,7 @@ with gr.Blocks(theme=theme) as demo:
414
  demo.title = "OpenReviewer"
415
 
416
 
 
 
417
  if __name__ == "__main__":
418
  demo.launch()
 
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
4
  import torch
5
  from threading import Thread
 
6
 
7
  from marker.convert import convert_single_pdf
8
  from marker.output import markdown_exists, save_markdown, get_markdown_filepath
 
37
  device_map="auto"
38
  )
39
  tokenizer = AutoTokenizer.from_pretrained(model_name)
40
+ streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, decode_kwargs=dict(skip_special_tokens=True))
41
 
42
  # Define prompts
43
  SYSTEM_PROMPT_TEMPLATE = """You are an expert reviewer for AI conferences. You follow best practices and review papers according to the reviewer guidelines.
 
87
  2: fair
88
  1: poor
89
 
90
+ ## Soundness Explanation
91
+ Please provide specific reasons for your soundness score, explaining which aspects of the technical claims, methodology, or evidence support your rating.
92
 
93
  ## Presentation
94
  Please assign the paper a numerical rating on the following scale to indicate the quality of the presentation. This should take into account the writing style and clarity, as well as contextualization relative to prior work. Choose from the following:
 
97
  2: fair
98
  1: poor
99
 
100
+ ## Presentation Explanation
101
+ Please explain your presentation score by addressing specific aspects of writing clarity, organization, figure quality, and how well the work is positioned within existing literature.
102
 
103
  ## Contribution
104
  Please assign the paper a numerical rating on the following scale to indicate the quality of the overall contribution this paper makes to the research area being studied. Are the questions being asked important? Does the paper bring a significant originality of ideas and/or execution? Are the results valuable to share with the broader ICLR community? Choose from the following:
 
107
  2: fair
108
  1: poor
109
 
110
+ ## Contribution Explanation
111
+ Please justify your contribution score by explaining the significance of the research questions, the originality of the approach, and the potential impact on the field.
112
 
113
  ## Strengths
114
  A substantive assessment of the strengths of the paper, touching on each of the following dimensions: originality, quality, clarity, and significance. We encourage reviewers to be broad in their definitions of originality and significance. For example, originality may arise from a new definition or problem formulation, creative combinations of existing ideas, application to a new domain, or removing limitations from prior results.
 
143
  8: accept, good paper
144
  10: strong accept, should be highlighted at the conference
145
 
146
+ Please provide a comprehensive justification for your overall rating, synthesizing the key strengths and weaknesses that led to this decision. Explain how you weighted different aspects of the paper in arriving at your final score.
147
 
148
  """
149
 
 
 
 
 
 
 
150
  # functions
151
  def create_messages(review_fields, paper_text):
152
  messages = [
 
155
  ]
156
  return messages
157
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  @spaces.GPU()
159
  def convert_file(filepath):
160
  full_text, images, out_metadata = convert_single_pdf(
 
181
  except spaces.zero.gradio.HTMLError as e:
182
  print(e)
183
  return "Error. GPU quota exceeded. Please return later."
184
+ # except gradio.exceptions.Error as e:
185
+ # if 'GPU task aborted' in str(e):
186
+ # print(e)
187
+ # return 'GPU task aborted'
188
  except Exception as e:
189
  print(traceback.format_exc())
190
  print(f"Error converting {filepath}: {e}")
 
192
  return paper_text
193
 
194
 
195
+ @spaces.GPU(duration=190)
196
  def generate(paper_text, review_template):
 
197
  messages = create_messages(review_template, paper_text)
198
  input_ids = tokenizer.apply_chat_template(
199
  messages,
200
  add_generation_prompt=True,
201
  return_tensors='pt'
202
  ).to(model.device)
203
+ print(f"input_ids shape: {input_ids.shape}")
204
+ generation_kwargs = dict(input_ids=input_ids, streamer=streamer, max_new_tokens=4096, do_sample=True, temperature=0.6, top_p=0.9)
205
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
206
+ thread.start()
207
+ generated_text = ""
208
+ for new_text in streamer:
209
+ generated_text += new_text
210
+ yield generated_text.replace("<|eot_id|>", "")
211
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
 
213
 
214
  # ui
215
+ #8C1B13 red
216
+ #4D8093 blue
217
+ #767676 med grey
218
+ #EFECE3 light grey
219
+ #DDDDDD silver below red
220
+ #FFFDFA white
221
+
222
  title = """<h1 align="center">OpenReviewer</h1>
223
  <div align="center">Using <a href="https://huggingface.co/maxidl/Llama-OpenReviewer-8B" target="_blank"><code>Llama-OpenReviewer-8B</code></a> - Built with Llama</div>
224
  """
 
237
 
238
  To obtain more than one review, just generate again.
239
 
 
 
240
  **GPU quota:** If exceeded, either sign in with your HF account or come back later. Your quota has a half-life of 2 hours.
241
 
242
  """
 
260
  demo.title = "OpenReviewer"
261
 
262
 
263
+
264
+
265
  if __name__ == "__main__":
266
  demo.launch()