ssocean commited on
Commit
ad8a799
·
verified ·
1 Parent(s): 47e27e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -25
app.py CHANGED
@@ -77,26 +77,59 @@ class PaperScorer:
77
  self.prompt_template = (
78
  "Given a certain paper, Title: {title}\nAbstract: {abstract}\nEvaluate the quality of this paper:"
79
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  def _rescale_score(self, s: float) -> float:
81
  """
82
- Piecewise linear rescaling:
83
- 0.0 ~ 0.2 -> 0.0 ~ 0.5
84
- 0.2 ~ 0.6 -> 0.5 ~ 1.0
85
- >0.6 -> 1.0
 
 
 
86
  """
87
- s = max(0.0, min(1.0, s)) # clamp input to [0,1]
 
 
 
 
 
 
 
 
 
 
 
88
 
89
- if s <= 0.2:
90
- # scale [0,0.2] to [0,0.5]
91
- y = (s / 0.2) * 0.5
92
- elif s <= 0.6:
93
- # scale (0.2,0.6] to (0.5,1.0]
94
- y = 0.5 + ((s - 0.2) / 0.4) * 0.5
95
- else:
96
- # everything above 0.6 maps to 1.0
97
- y = 1.0
98
 
99
- return round(y, 4)
 
100
  def score(self, title: str, abstract: str) -> float:
101
  prompt = self.prompt_template.format(title=title.strip(), abstract=abstract.strip())
102
  inputs = self.tokenizer(
@@ -134,12 +167,12 @@ def predict(title, abstract, model_version):
134
 
135
  if model_version == "v1":
136
  raw, final = predict_v1(title, abstract)
137
- print(f'Raw: {raw}, Final: {final}')
138
  print(f'-------------------------------------------------------------------------------\n\n')
139
  return [["Raw Score", round(raw, 4)], ["Final Score", round(final, 4)]]
140
  else:
141
  raw, final = predict_v2(title, abstract)
142
- print(f'Raw: {raw}, Final: {final}')
143
  print(f'-------------------------------------------------------------------------------\n\n')
144
  return [["Raw Score", round(raw, 4)], ["Final Score", round(final, 4)]]
145
 
@@ -205,7 +238,7 @@ examples = [
205
  # ===== Gradio 界面 =====
206
  with gr.Blocks() as iface:
207
  gr.Markdown("""
208
- # 📈 Predict Impact & Quality of Newly Published Papers
209
 
210
  ### LLM-powered estimates from a paper’s title and abstract.
211
 
@@ -214,8 +247,10 @@ with gr.Blocks() as iface:
214
  - [**NAIPv2**](https://arxiv.org/abs/2509.25179) — predicts **paper quality** *(default)*
215
  *See the papers for methodology and evaluation details.*
216
 
217
- > ⚡ **Note:** Local inference is instantaneous. On **Hugging Face ZeroGPU**, the quantized model is reloaded **from disk** on each prediction, which can introduce minor disk-I/O delays (typically <30 s).
218
- > Since **NAIPv2** was trained with *relative supervision*, its absolute scores are most reliable for papers **within the training distribution** (e.g., ICLR-style topics). For **out-of-distribution areas** like robotics or 3D vision, it’s better to focus on the **raw score** and compare it against a domain-specific baseline.
 
 
219
  """)
220
 
221
  with gr.Row():
@@ -248,14 +283,14 @@ with gr.Blocks() as iface:
248
  label="Predicted Scores"
249
  )
250
  gr.Markdown("""
251
- ## Ethical Warnings and Important Notes
 
252
  - It is intended as a tool **for research and educational purposes only**.
253
  - Please refrain from deliberately embellishing the title and abstract to boost scores, and **avoid making false claims**.
254
- - This demo explores the use of LLMs for paper quality estimation and is not optimized against prompt injection attacks.
255
  - The **predicted value** is a probability generated by the model and **does NOT reflect the paper's true quality or novelty**.
256
- - For **NAIPv1**, a predicted influence score greater than 0.60 is considered to indicate a potentially impactful paper.
257
- - For **NAIPv2**, a predicted influence score greater than 0.50 is considered to indicate a potentially high-quality paper. *Note*: Since NAIPv2 was trained without absolute scale supervision, the 0–1 scores are rescaled by us; 0.50 indicates above the statistical average quality of AI papers; ≥0.65 is considered to reach the NeurIPS poster level; ~0.75 corresponds to the average spotlight level; ~0.85 corresponds to the average oral level.
258
- - The **author takes NO responsibility** for the prediction results.
259
  """)
260
 
261
  title_input.change(
 
77
  self.prompt_template = (
78
  "Given a certain paper, Title: {title}\nAbstract: {abstract}\nEvaluate the quality of this paper:"
79
  )
80
+ # def _rescale_score(self, s: float) -> float:
81
+ # """
82
+ # Piecewise linear rescaling:
83
+ # 0.0 ~ 0.2 -> 0.0 ~ 0.5
84
+ # 0.2 ~ 0.6 -> 0.5 ~ 1.0
85
+ # >0.6 -> 1.0
86
+ # """
87
+ # s = max(0.0, min(1.0, s)) # clamp input to [0,1]
88
+
89
+ # if s <= 0.2:
90
+ # # scale [0,0.2] to [0,0.5]
91
+ # y = (s / 0.2) * 0.5
92
+ # elif s <= 0.6:
93
+ # # scale (0.2,0.6] to (0.5,1.0]
94
+ # y = 0.5 + ((s - 0.2) / 0.4) * 0.5
95
+ # else:
96
+ # # everything above 0.6 maps to 1.0
97
+ # y = 1.0
98
+
99
+ # return round(y, 4)
100
  def _rescale_score(self, s: float) -> float:
101
  """
102
+ Piecewise linear rescaling with custom anchors.
103
+ Mapping examples:
104
+ 0.233 -> 0.465
105
+ 0.372 -> 0.608
106
+ 0.423 -> 0.714
107
+ 0.496 -> 0.786
108
+ Entire range still in [0,1].
109
  """
110
+ # clamp input
111
+ s = max(0.0, min(1.0, s))
112
+
113
+ # define anchors (x -> y)
114
+ anchors = [
115
+ (0.0, 0.0),
116
+ (0.233, 0.465),
117
+ (0.372, 0.608),
118
+ (0.423, 0.714),
119
+ (0.496, 0.786),
120
+ (1.0, 1.0),
121
+ ]
122
 
123
+ # find interval
124
+ for (x1, y1), (x2, y2) in zip(anchors, anchors[1:]):
125
+ if x1 <= s <= x2:
126
+ # linear interpolation
127
+ t = (s - x1) / (x2 - x1)
128
+ y = y1 + t * (y2 - y1)
129
+ return round(y, 4)
 
 
130
 
131
+ return 1.0
132
+
133
  def score(self, title: str, abstract: str) -> float:
134
  prompt = self.prompt_template.format(title=title.strip(), abstract=abstract.strip())
135
  inputs = self.tokenizer(
 
167
 
168
  if model_version == "v1":
169
  raw, final = predict_v1(title, abstract)
170
+ print(f'Raw Score: {raw}, Normalized Score: {final}')
171
  print(f'-------------------------------------------------------------------------------\n\n')
172
  return [["Raw Score", round(raw, 4)], ["Final Score", round(final, 4)]]
173
  else:
174
  raw, final = predict_v2(title, abstract)
175
+ print(f'Raw Score: {raw}, Normalized Score: {final}')
176
  print(f'-------------------------------------------------------------------------------\n\n')
177
  return [["Raw Score", round(raw, 4)], ["Final Score", round(final, 4)]]
178
 
 
238
  # ===== Gradio 界面 =====
239
  with gr.Blocks() as iface:
240
  gr.Markdown("""
241
+ # 📈 Predict Impact & Quality of Newborn Papers
242
 
243
  ### LLM-powered estimates from a paper’s title and abstract.
244
 
 
247
  - [**NAIPv2**](https://arxiv.org/abs/2509.25179) — predicts **paper quality** *(default)*
248
  *See the papers for methodology and evaluation details.*
249
 
250
+ > ⚡ **Note:** Local inference is instantaneous. On **Hugging Face ZeroGPU**, the quantized model is reloaded **from disk** on each prediction, which can introduce significant disk-I/O delays (typically <30 s).
251
+
252
+ > For **NAIPv2**, the output **Normalized score** may not be comparable across different domains. It is recommended to use the **Raw score** magnitude for quality estimation within the same domain.
253
+
254
  """)
255
 
256
  with gr.Row():
 
283
  label="Predicted Scores"
284
  )
285
  gr.Markdown("""
286
+ ## Important Notes
287
+ - The reported performance reflects aggregate statistical outcomes, rather than guarantees for individual instances.
288
  - It is intended as a tool **for research and educational purposes only**.
289
  - Please refrain from deliberately embellishing the title and abstract to boost scores, and **avoid making false claims**.
290
+ - This demo is an early exploration of using LLMs for paper quality estimation and is not optimized against prompt injection attacks.
291
  - The **predicted value** is a probability generated by the model and **does NOT reflect the paper's true quality or novelty**.
292
+ - For **NAIPv1**, a normalized score greater than **0.60** is considered to indicate a potentially impactful paper.
293
+ - For **NAIPv2**, normalized scores above **0.46**, **0.61**, and **0.79** correspond to the statistical means of NeurIPS rejected, Poster, and Oral papers, respectively.
 
294
  """)
295
 
296
  title_input.change(