ssocean commited on
Commit
f6b8d98
·
verified ·
1 Parent(s): 8f72d39

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -7
app.py CHANGED
@@ -57,15 +57,42 @@ class PaperScorer:
57
  self.prompt_template = (
58
  "Given a certain paper, Title: {title}\nAbstract: {abstract}\nEvaluate the quality of this paper:"
59
  )
60
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  def score(self, title: str, abstract: str) -> float:
62
  prompt = self.prompt_template.format(title=title.strip(), abstract=abstract.strip())
63
- inputs = self.tokenizer(prompt, return_tensors='pt', padding=True, truncation=True,
64
- max_length=self.max_length).to(self.device)
 
 
 
 
 
 
65
  with torch.no_grad():
66
- logits = self.model(**inputs).logits + 2.0 # # NAIPv2 only learned relative magnitudes. To make the values more comparable, we added a compensation value derived from statistics to the output of v2.
67
- score = torch.sigmoid(logits).view(-1).item()
68
- return round(score, 4)
 
 
69
 
70
  @spaces.GPU(duration=60, enable_queue=True)
71
  def predict_v2(title, abstract):
@@ -187,8 +214,15 @@ with gr.Blocks() as iface:
187
  - It is intended as a tool **for research and educational purposes only**.
188
  - Please refrain from deliberately embellishing the title and abstract to boost scores, and **avoid making false claims**.
189
  - The **predicted value** is a probability generated by the model and **does NOT reflect the paper's true quality or novelty**.
190
- - Empirically, it is considered that a predicted influence score greater than 0.60 in v1 indicates a potentially impactful paper, while a score greater than 0.5 in v2 indicates a high-quality paper.
191
  - The **author takes NO responsibility** for the prediction results.
 
 
 
 
 
 
 
 
192
  """)
193
 
194
  title_input.change(
 
57
  self.prompt_template = (
58
  "Given a certain paper, Title: {title}\nAbstract: {abstract}\nEvaluate the quality of this paper:"
59
  )
60
+ def _rescale_score(self, s: float) -> float:
61
+ """
62
+ Piecewise linear rescaling:
63
+ 0.0 ~ 0.2 -> 0.0 ~ 0.5
64
+ 0.2 ~ 0.6 -> 0.5 ~ 1.0
65
+ >0.6 -> 1.0
66
+ """
67
+ s = max(0.0, min(1.0, s)) # clamp input to [0,1]
68
+
69
+ if s <= 0.2:
70
+ # scale [0,0.2] to [0,0.5]
71
+ y = (s / 0.2) * 0.5
72
+ elif s <= 0.6:
73
+ # scale (0.2,0.6] to (0.5,1.0]
74
+ y = 0.5 + ((s - 0.2) / 0.4) * 0.5
75
+ else:
76
+ # everything above 0.6 maps to 1.0
77
+ y = 1.0
78
+
79
+ return round(y, 4)
80
  def score(self, title: str, abstract: str) -> float:
81
  prompt = self.prompt_template.format(title=title.strip(), abstract=abstract.strip())
82
+ inputs = self.tokenizer(
83
+ prompt,
84
+ return_tensors='pt',
85
+ padding=True,
86
+ truncation=True,
87
+ max_length=self.max_length
88
+ ).to(self.device)
89
+
90
  with torch.no_grad():
91
+ logits = self.model(**inputs).logits
92
+ raw_score = torch.sigmoid(logits).view(-1).item()
93
+
94
+ final_score = self._rescale_score(raw_score) #
95
+ return round(final_score, 4)
96
 
97
  @spaces.GPU(duration=60, enable_queue=True)
98
  def predict_v2(title, abstract):
 
214
  - It is intended as a tool **for research and educational purposes only**.
215
  - Please refrain from deliberately embellishing the title and abstract to boost scores, and **avoid making false claims**.
216
  - The **predicted value** is a probability generated by the model and **does NOT reflect the paper's true quality or novelty**.
 
217
  - The **author takes NO responsibility** for the prediction results.
218
+ - This demo explores LLMs for paper quality assessment, but is **not optimized against prompt injection attacks**.
219
+ - For **NAIPv1**, a predicted influence score greater than 0.60 is considered to indicate a potentially impactful paper.
220
+ - For **NAIPv2**, a predicted influence score greater than 0.50 is considered to indicate a potentially high-quality paper.
221
+ *Note*: Since NAIPv2 was trained without absolute scale supervision, the 0–1 scores are rescaled.
222
+ - 0.50 indicates above the statistical average quality of AI papers.
223
+ - >0.65 is considered to reach the NeurIPS poster level.
224
+ - ~0.75 corresponds to the average NeurIPS spotlight level.
225
+ - ~0.85 corresponds to the average NeurIPS oral level.
226
  """)
227
 
228
  title_input.change(