Spaces:
Running on Zero
Running on Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -77,26 +77,59 @@ class PaperScorer:
|
|
| 77 |
self.prompt_template = (
|
| 78 |
"Given a certain paper, Title: {title}\nAbstract: {abstract}\nEvaluate the quality of this paper:"
|
| 79 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
def _rescale_score(self, s: float) -> float:
|
| 81 |
"""
|
| 82 |
-
Piecewise linear rescaling
|
| 83 |
-
|
| 84 |
-
0.
|
| 85 |
-
|
|
|
|
|
|
|
|
|
|
| 86 |
"""
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
# everything above 0.6 maps to 1.0
|
| 97 |
-
y = 1.0
|
| 98 |
|
| 99 |
-
return
|
|
|
|
| 100 |
def score(self, title: str, abstract: str) -> float:
|
| 101 |
prompt = self.prompt_template.format(title=title.strip(), abstract=abstract.strip())
|
| 102 |
inputs = self.tokenizer(
|
|
@@ -134,12 +167,12 @@ def predict(title, abstract, model_version):
|
|
| 134 |
|
| 135 |
if model_version == "v1":
|
| 136 |
raw, final = predict_v1(title, abstract)
|
| 137 |
-
print(f'Raw: {raw},
|
| 138 |
print(f'-------------------------------------------------------------------------------\n\n')
|
| 139 |
return [["Raw Score", round(raw, 4)], ["Final Score", round(final, 4)]]
|
| 140 |
else:
|
| 141 |
raw, final = predict_v2(title, abstract)
|
| 142 |
-
print(f'Raw: {raw},
|
| 143 |
print(f'-------------------------------------------------------------------------------\n\n')
|
| 144 |
return [["Raw Score", round(raw, 4)], ["Final Score", round(final, 4)]]
|
| 145 |
|
|
@@ -205,7 +238,7 @@ examples = [
|
|
| 205 |
# ===== Gradio 界面 =====
|
| 206 |
with gr.Blocks() as iface:
|
| 207 |
gr.Markdown("""
|
| 208 |
-
# 📈 Predict Impact & Quality of
|
| 209 |
|
| 210 |
### LLM-powered estimates from a paper’s title and abstract.
|
| 211 |
|
|
@@ -214,8 +247,10 @@ with gr.Blocks() as iface:
|
|
| 214 |
- [**NAIPv2**](https://arxiv.org/abs/2509.25179) — predicts **paper quality** *(default)*
|
| 215 |
*See the papers for methodology and evaluation details.*
|
| 216 |
|
| 217 |
-
> ⚡ **Note:** Local inference is instantaneous. On **Hugging Face ZeroGPU**, the quantized model is reloaded **from disk** on each prediction, which can introduce
|
| 218 |
-
|
|
|
|
|
|
|
| 219 |
""")
|
| 220 |
|
| 221 |
with gr.Row():
|
|
@@ -248,14 +283,14 @@ with gr.Blocks() as iface:
|
|
| 248 |
label="Predicted Scores"
|
| 249 |
)
|
| 250 |
gr.Markdown("""
|
| 251 |
-
##
|
|
|
|
| 252 |
- It is intended as a tool **for research and educational purposes only**.
|
| 253 |
- Please refrain from deliberately embellishing the title and abstract to boost scores, and **avoid making false claims**.
|
| 254 |
-
- This demo
|
| 255 |
- The **predicted value** is a probability generated by the model and **does NOT reflect the paper's true quality or novelty**.
|
| 256 |
-
- For **NAIPv1**, a
|
| 257 |
-
- For **NAIPv2**,
|
| 258 |
-
- The **author takes NO responsibility** for the prediction results.
|
| 259 |
""")
|
| 260 |
|
| 261 |
title_input.change(
|
|
|
|
| 77 |
self.prompt_template = (
|
| 78 |
"Given a certain paper, Title: {title}\nAbstract: {abstract}\nEvaluate the quality of this paper:"
|
| 79 |
)
|
| 80 |
+
# def _rescale_score(self, s: float) -> float:
|
| 81 |
+
# """
|
| 82 |
+
# Piecewise linear rescaling:
|
| 83 |
+
# 0.0 ~ 0.2 -> 0.0 ~ 0.5
|
| 84 |
+
# 0.2 ~ 0.6 -> 0.5 ~ 1.0
|
| 85 |
+
# >0.6 -> 1.0
|
| 86 |
+
# """
|
| 87 |
+
# s = max(0.0, min(1.0, s)) # clamp input to [0,1]
|
| 88 |
+
|
| 89 |
+
# if s <= 0.2:
|
| 90 |
+
# # scale [0,0.2] to [0,0.5]
|
| 91 |
+
# y = (s / 0.2) * 0.5
|
| 92 |
+
# elif s <= 0.6:
|
| 93 |
+
# # scale (0.2,0.6] to (0.5,1.0]
|
| 94 |
+
# y = 0.5 + ((s - 0.2) / 0.4) * 0.5
|
| 95 |
+
# else:
|
| 96 |
+
# # everything above 0.6 maps to 1.0
|
| 97 |
+
# y = 1.0
|
| 98 |
+
|
| 99 |
+
# return round(y, 4)
|
| 100 |
def _rescale_score(self, s: float) -> float:
|
| 101 |
"""
|
| 102 |
+
Piecewise linear rescaling with custom anchors.
|
| 103 |
+
Mapping examples:
|
| 104 |
+
0.233 -> 0.465
|
| 105 |
+
0.372 -> 0.608
|
| 106 |
+
0.423 -> 0.714
|
| 107 |
+
0.496 -> 0.786
|
| 108 |
+
Entire range still in [0,1].
|
| 109 |
"""
|
| 110 |
+
# clamp input
|
| 111 |
+
s = max(0.0, min(1.0, s))
|
| 112 |
+
|
| 113 |
+
# define anchors (x -> y)
|
| 114 |
+
anchors = [
|
| 115 |
+
(0.0, 0.0),
|
| 116 |
+
(0.233, 0.465),
|
| 117 |
+
(0.372, 0.608),
|
| 118 |
+
(0.423, 0.714),
|
| 119 |
+
(0.496, 0.786),
|
| 120 |
+
(1.0, 1.0),
|
| 121 |
+
]
|
| 122 |
|
| 123 |
+
# find interval
|
| 124 |
+
for (x1, y1), (x2, y2) in zip(anchors, anchors[1:]):
|
| 125 |
+
if x1 <= s <= x2:
|
| 126 |
+
# linear interpolation
|
| 127 |
+
t = (s - x1) / (x2 - x1)
|
| 128 |
+
y = y1 + t * (y2 - y1)
|
| 129 |
+
return round(y, 4)
|
|
|
|
|
|
|
| 130 |
|
| 131 |
+
return 1.0
|
| 132 |
+
|
| 133 |
def score(self, title: str, abstract: str) -> float:
|
| 134 |
prompt = self.prompt_template.format(title=title.strip(), abstract=abstract.strip())
|
| 135 |
inputs = self.tokenizer(
|
|
|
|
| 167 |
|
| 168 |
if model_version == "v1":
|
| 169 |
raw, final = predict_v1(title, abstract)
|
| 170 |
+
print(f'Raw Score: {raw}, Normalized Score: {final}')
|
| 171 |
print(f'-------------------------------------------------------------------------------\n\n')
|
| 172 |
return [["Raw Score", round(raw, 4)], ["Final Score", round(final, 4)]]
|
| 173 |
else:
|
| 174 |
raw, final = predict_v2(title, abstract)
|
| 175 |
+
print(f'Raw Score: {raw}, Normalized Score: {final}')
|
| 176 |
print(f'-------------------------------------------------------------------------------\n\n')
|
| 177 |
return [["Raw Score", round(raw, 4)], ["Final Score", round(final, 4)]]
|
| 178 |
|
|
|
|
| 238 |
# ===== Gradio 界面 =====
|
| 239 |
with gr.Blocks() as iface:
|
| 240 |
gr.Markdown("""
|
| 241 |
+
# 📈 Predict Impact & Quality of Newborn Papers
|
| 242 |
|
| 243 |
### LLM-powered estimates from a paper’s title and abstract.
|
| 244 |
|
|
|
|
| 247 |
- [**NAIPv2**](https://arxiv.org/abs/2509.25179) — predicts **paper quality** *(default)*
|
| 248 |
*See the papers for methodology and evaluation details.*
|
| 249 |
|
| 250 |
+
> ⚡ **Note:** Local inference is instantaneous. On **Hugging Face ZeroGPU**, the quantized model is reloaded **from disk** on each prediction, which can introduce significant disk-I/O delays (typically <30 s).
|
| 251 |
+
|
| 252 |
+
> For **NAIPv2**, the output **Normalized score** may not be comparable across different domains. It is recommended to use the **Raw score** magnitude for quality estimation within the same domain.
|
| 253 |
+
|
| 254 |
""")
|
| 255 |
|
| 256 |
with gr.Row():
|
|
|
|
| 283 |
label="Predicted Scores"
|
| 284 |
)
|
| 285 |
gr.Markdown("""
|
| 286 |
+
## Important Notes
|
| 287 |
+
- The reported performance reflects aggregate statistical outcomes, rather than guarantees for individual instances.
|
| 288 |
- It is intended as a tool **for research and educational purposes only**.
|
| 289 |
- Please refrain from deliberately embellishing the title and abstract to boost scores, and **avoid making false claims**.
|
| 290 |
+
- This demo is an early exploration of using LLMs for paper quality estimation and is not optimized against prompt injection attacks.
|
| 291 |
- The **predicted value** is a probability generated by the model and **does NOT reflect the paper's true quality or novelty**.
|
| 292 |
+
- For **NAIPv1**, a normalized score greater than **0.60** is considered to indicate a potentially impactful paper.
|
| 293 |
+
- For **NAIPv2**, normalized scores above **0.46**, **0.61**, and **0.79** correspond to the statistical means of NeurIPS rejected, Poster, and Oral papers, respectively.
|
|
|
|
| 294 |
""")
|
| 295 |
|
| 296 |
title_input.change(
|