Spaces:
Running on Zero
Running on Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -57,15 +57,42 @@ class PaperScorer:
|
|
| 57 |
self.prompt_template = (
|
| 58 |
"Given a certain paper, Title: {title}\nAbstract: {abstract}\nEvaluate the quality of this paper:"
|
| 59 |
)
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
def score(self, title: str, abstract: str) -> float:
|
| 62 |
prompt = self.prompt_template.format(title=title.strip(), abstract=abstract.strip())
|
| 63 |
-
inputs = self.tokenizer(
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
with torch.no_grad():
|
| 66 |
-
logits = self.model(**inputs).logits
|
| 67 |
-
|
| 68 |
-
|
|
|
|
|
|
|
| 69 |
|
| 70 |
@spaces.GPU(duration=60, enable_queue=True)
|
| 71 |
def predict_v2(title, abstract):
|
|
@@ -187,8 +214,15 @@ with gr.Blocks() as iface:
|
|
| 187 |
- It is intended as a tool **for research and educational purposes only**.
|
| 188 |
- Please refrain from deliberately embellishing the title and abstract to boost scores, and **avoid making false claims**.
|
| 189 |
- The **predicted value** is a probability generated by the model and **does NOT reflect the paper's true quality or novelty**.
|
| 190 |
-
- Empirically, it is considered that a predicted influence score greater than 0.60 in v1 indicates a potentially impactful paper, while a score greater than 0.5 in v2 indicates a high-quality paper.
|
| 191 |
- The **author takes NO responsibility** for the prediction results.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
""")
|
| 193 |
|
| 194 |
title_input.change(
|
|
|
|
| 57 |
self.prompt_template = (
|
| 58 |
"Given a certain paper, Title: {title}\nAbstract: {abstract}\nEvaluate the quality of this paper:"
|
| 59 |
)
|
| 60 |
+
def _rescale_score(self, s: float) -> float:
|
| 61 |
+
"""
|
| 62 |
+
Piecewise linear rescaling:
|
| 63 |
+
0.0 ~ 0.2 -> 0.0 ~ 0.5
|
| 64 |
+
0.2 ~ 0.6 -> 0.5 ~ 1.0
|
| 65 |
+
>0.6 -> 1.0
|
| 66 |
+
"""
|
| 67 |
+
s = max(0.0, min(1.0, s)) # clamp input to [0,1]
|
| 68 |
+
|
| 69 |
+
if s <= 0.2:
|
| 70 |
+
# scale [0,0.2] to [0,0.5]
|
| 71 |
+
y = (s / 0.2) * 0.5
|
| 72 |
+
elif s <= 0.6:
|
| 73 |
+
# scale (0.2,0.6] to (0.5,1.0]
|
| 74 |
+
y = 0.5 + ((s - 0.2) / 0.4) * 0.5
|
| 75 |
+
else:
|
| 76 |
+
# everything above 0.6 maps to 1.0
|
| 77 |
+
y = 1.0
|
| 78 |
+
|
| 79 |
+
return round(y, 4)
|
| 80 |
def score(self, title: str, abstract: str) -> float:
|
| 81 |
prompt = self.prompt_template.format(title=title.strip(), abstract=abstract.strip())
|
| 82 |
+
inputs = self.tokenizer(
|
| 83 |
+
prompt,
|
| 84 |
+
return_tensors='pt',
|
| 85 |
+
padding=True,
|
| 86 |
+
truncation=True,
|
| 87 |
+
max_length=self.max_length
|
| 88 |
+
).to(self.device)
|
| 89 |
+
|
| 90 |
with torch.no_grad():
|
| 91 |
+
logits = self.model(**inputs).logits
|
| 92 |
+
raw_score = torch.sigmoid(logits).view(-1).item()
|
| 93 |
+
|
| 94 |
+
final_score = self._rescale_score(raw_score) #
|
| 95 |
+
return round(final_score, 4)
|
| 96 |
|
| 97 |
@spaces.GPU(duration=60, enable_queue=True)
|
| 98 |
def predict_v2(title, abstract):
|
|
|
|
| 214 |
- It is intended as a tool **for research and educational purposes only**.
|
| 215 |
- Please refrain from deliberately embellishing the title and abstract to boost scores, and **avoid making false claims**.
|
| 216 |
- The **predicted value** is a probability generated by the model and **does NOT reflect the paper's true quality or novelty**.
|
|
|
|
| 217 |
- The **author takes NO responsibility** for the prediction results.
|
| 218 |
+
- This demo explores LLMs for paper quality assessment, but is **not optimized against prompt injection attacks**.
|
| 219 |
+
- For **NAIPv1**, a predicted influence score greater than 0.60 is considered to indicate a potentially impactful paper.
|
| 220 |
+
- For **NAIPv2**, a predicted influence score greater than 0.50 is considered to indicate a potentially high-quality paper.
|
| 221 |
+
*Note*: Since NAIPv2 was trained without absolute scale supervision, the 0–1 scores are rescaled.
|
| 222 |
+
- 0.50 indicates above the statistical average quality of AI papers.
|
| 223 |
+
- >0.65 is considered to reach the NeurIPS poster level.
|
| 224 |
+
- ~0.75 corresponds to the average NeurIPS spotlight level.
|
| 225 |
+
- ~0.85 corresponds to the average NeurIPS oral level.
|
| 226 |
""")
|
| 227 |
|
| 228 |
title_input.change(
|