Spaces:

ssocean
/

Newborn_Article_Impact_Predict

Running on Zero

App Files Files Community

ssocean commited on Oct 1, 2025

Commit

ad8a799

verified ·

1 Parent(s): 47e27e9

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -25

app.py CHANGED Viewed

@@ -77,26 +77,59 @@ class PaperScorer:
         self.prompt_template = (
             "Given a certain paper, Title: {title}\nAbstract: {abstract}\nEvaluate the quality of this paper:"
         )
     def _rescale_score(self, s: float) -> float:
         """
-        Piecewise linear rescaling:
-          0.0 ~ 0.2  ->  0.0 ~ 0.5
-          0.2 ~ 0.6  ->  0.5 ~ 1.0
-          >0.6       ->  1.0
         """
-        s = max(0.0, min(1.0, s))  # clamp input to [0,1]
-        if s <= 0.2:
-            # scale [0,0.2] to [0,0.5]
-            y = (s / 0.2) * 0.5
-        elif s <= 0.6:
-            # scale (0.2,0.6] to (0.5,1.0]
-            y = 0.5 + ((s - 0.2) / 0.4) * 0.5
-        else:
-            # everything above 0.6 maps to 1.0
-            y = 1.0
-        return round(y, 4)
     def score(self, title: str, abstract: str) -> float:
         prompt = self.prompt_template.format(title=title.strip(), abstract=abstract.strip())
         inputs = self.tokenizer(
@@ -134,12 +167,12 @@ def predict(title, abstract, model_version):
     if model_version == "v1":
         raw, final = predict_v1(title, abstract)
-        print(f'Raw: {raw}, Final: {final}')
         print(f'-------------------------------------------------------------------------------\n\n')
         return [["Raw Score", round(raw, 4)], ["Final Score", round(final, 4)]]
     else:
         raw, final = predict_v2(title, abstract)
-        print(f'Raw: {raw}, Final: {final}')
         print(f'-------------------------------------------------------------------------------\n\n')
         return [["Raw Score", round(raw, 4)], ["Final Score", round(final, 4)]]
@@ -205,7 +238,7 @@ examples = [
 # ===== Gradio 界面 =====
 with gr.Blocks() as iface:
     gr.Markdown("""
-    # 📈 Predict Impact & Quality of Newly Published Papers
     ### LLM-powered estimates from a paper’s title and abstract.
@@ -214,8 +247,10 @@ with gr.Blocks() as iface:
     - [**NAIPv2**](https://arxiv.org/abs/2509.25179) — predicts **paper quality** *(default)*
     *See the papers for methodology and evaluation details.*
-    > ⚡ **Note:** Local inference is instantaneous. On **Hugging Face ZeroGPU**, the quantized model is reloaded **from disk** on each prediction, which can introduce minor disk-I/O delays (typically <30 s).
-    > Since **NAIPv2** was trained with *relative supervision*, its absolute scores are most reliable for papers **within the training distribution** (e.g., ICLR-style topics). For **out-of-distribution areas** like robotics or 3D vision, it’s better to focus on the **raw score** and compare it against a domain-specific baseline.
     """)
     with gr.Row():
@@ -248,14 +283,14 @@ with gr.Blocks() as iface:
                 label="Predicted Scores"
             )
             gr.Markdown("""
-            ## Ethical Warnings and Important Notes
             - It is intended as a tool **for research and educational purposes only**.
             - Please refrain from deliberately embellishing the title and abstract to boost scores, and **avoid making false claims**.
-            - This demo explores the use of LLMs for paper quality estimation and is not optimized against prompt injection attacks.
             - The **predicted value** is a probability generated by the model and **does NOT reflect the paper's true quality or novelty**.
-            - For **NAIPv1**, a predicted influence score greater than 0.60 is considered to indicate a potentially impactful paper.
-            - For **NAIPv2**, a predicted influence score greater than 0.50 is considered to indicate a potentially high-quality paper. *Note*: Since NAIPv2 was trained without absolute scale supervision, the 0–1 scores are rescaled by us; 0.50 indicates above the statistical average quality of AI papers; ≥0.65 is considered to reach the NeurIPS poster level; ~0.75 corresponds to the average spotlight level; ~0.85 corresponds to the average oral level.
-            - The **author takes NO responsibility** for the prediction results.
             """)
     title_input.change(

         self.prompt_template = (
             "Given a certain paper, Title: {title}\nAbstract: {abstract}\nEvaluate the quality of this paper:"
         )
+    # def _rescale_score(self, s: float) -> float:
+    #     """
+    #     Piecewise linear rescaling:
+    #       0.0 ~ 0.2  ->  0.0 ~ 0.5
+    #       0.2 ~ 0.6  ->  0.5 ~ 1.0
+    #       >0.6       ->  1.0
+    #     """
+    #     s = max(0.0, min(1.0, s))  # clamp input to [0,1]
+    #     if s <= 0.2:
+    #         # scale [0,0.2] to [0,0.5]
+    #         y = (s / 0.2) * 0.5
+    #     elif s <= 0.6:
+    #         # scale (0.2,0.6] to (0.5,1.0]
+    #         y = 0.5 + ((s - 0.2) / 0.4) * 0.5
+    #     else:
+    #         # everything above 0.6 maps to 1.0
+    #         y = 1.0
+    #     return round(y, 4)
     def _rescale_score(self, s: float) -> float:
         """
+        Piecewise linear rescaling with custom anchors.
+        Mapping examples:
+          0.233 -> 0.465
+          0.372 -> 0.608
+          0.423 -> 0.714
+          0.496 -> 0.786
+        Entire range still in [0,1].
         """
+        # clamp input
+        s = max(0.0, min(1.0, s))
+        # define anchors (x -> y)
+        anchors = [
+            (0.0,   0.0),
+            (0.233, 0.465),
+            (0.372, 0.608),
+            (0.423, 0.714),
+            (0.496, 0.786),
+            (1.0,   1.0),
+        ]
+        # find interval
+        for (x1, y1), (x2, y2) in zip(anchors, anchors[1:]):
+            if x1 <= s <= x2:
+                # linear interpolation
+                t = (s - x1) / (x2 - x1)
+                y = y1 + t * (y2 - y1)
+                return round(y, 4)
+        return 1.0
     def score(self, title: str, abstract: str) -> float:
         prompt = self.prompt_template.format(title=title.strip(), abstract=abstract.strip())
         inputs = self.tokenizer(
     if model_version == "v1":
         raw, final = predict_v1(title, abstract)
+        print(f'Raw Score: {raw}, Normalized Score: {final}')
         print(f'-------------------------------------------------------------------------------\n\n')
         return [["Raw Score", round(raw, 4)], ["Final Score", round(final, 4)]]
     else:
         raw, final = predict_v2(title, abstract)
+        print(f'Raw Score: {raw}, Normalized Score: {final}')
         print(f'-------------------------------------------------------------------------------\n\n')
         return [["Raw Score", round(raw, 4)], ["Final Score", round(final, 4)]]
 # ===== Gradio 界面 =====
 with gr.Blocks() as iface:
     gr.Markdown("""
+    # 📈 Predict Impact & Quality of Newborn Papers
     ### LLM-powered estimates from a paper’s title and abstract.
     - [**NAIPv2**](https://arxiv.org/abs/2509.25179) — predicts **paper quality** *(default)*
     *See the papers for methodology and evaluation details.*
+    > ⚡ **Note:** Local inference is instantaneous. On **Hugging Face ZeroGPU**, the quantized model is reloaded **from disk** on each prediction, which can introduce significant disk-I/O delays (typically <30 s).
+    > For **NAIPv2**, the output **Normalized score** may not be comparable across different domains.  It is recommended to use the **Raw score** magnitude for quality estimation within the same domain.
     """)
     with gr.Row():
                 label="Predicted Scores"
             )
             gr.Markdown("""
+            ## Important Notes
+            - The reported performance reflects aggregate statistical outcomes, rather than guarantees for individual instances.
             - It is intended as a tool **for research and educational purposes only**.
             - Please refrain from deliberately embellishing the title and abstract to boost scores, and **avoid making false claims**.
+            - This demo is an early exploration of using LLMs for paper quality estimation and is not optimized against prompt injection attacks.
             - The **predicted value** is a probability generated by the model and **does NOT reflect the paper's true quality or novelty**.
+            - For **NAIPv1**, a normalized score greater than **0.60** is considered to indicate a potentially impactful paper.
+            - For **NAIPv2**, normalized scores above **0.46**, **0.61**, and **0.79** correspond to the statistical means of NeurIPS rejected, Poster, and Oral papers, respectively.
             """)
     title_input.change(