| import gradio as gr |
| import requests |
| from cachetools import cached, TTLCache |
| from httpx import Client |
| import json |
| from pathlib import Path |
| from huggingface_hub import CommitScheduler |
| from dotenv import load_dotenv |
| import os |
| from typing import Tuple |
|
|
| load_dotenv() |
|
|
| HF_TOKEN = os.environ["HF_TOKEN"] |
|
|
| CACHE_TIME = 60 * 60 * 6 |
|
|
| client = Client() |
|
|
| REPO_ID = "librarian-bots/paper-recommendations-v2" |
|
|
| scheduler = CommitScheduler( |
| repo_id=REPO_ID, |
| repo_type="dataset", |
| folder_path="comments", |
| path_in_repo="data", |
| every=5, |
| token=HF_TOKEN, |
| ) |
|
|
|
|
| def parse_arxiv_id_from_paper_url(url): |
| return url.split("/")[-1] |
|
|
|
|
| @cached(cache=TTLCache(maxsize=500, ttl=CACHE_TIME)) |
| def get_recommendations_from_semantic_scholar(semantic_scholar_id: str): |
| try: |
| r = requests.post( |
| "https://api.semanticscholar.org/recommendations/v1/papers/", |
| json={ |
| "positivePaperIds": [semantic_scholar_id], |
| }, |
| params={"fields": "externalIds,title,year", "limit": 14}, |
| ) |
| return r.json()["recommendedPapers"] |
| except KeyError as e: |
| raise gr.Error( |
| "Error getting recommendations, if this is a new paper it may not yet have" |
| " been indexed by Semantic Scholar." |
| ) from e |
|
|
|
|
| def filter_recommendations(recommendations, max_paper_count=7): |
| |
| arxiv_paper = [ |
| r for r in recommendations if r["externalIds"].get("ArXiv", None) is not None |
| ] |
| if len(arxiv_paper) > max_paper_count: |
| arxiv_paper = arxiv_paper[:max_paper_count] |
| return arxiv_paper |
|
|
|
|
| @cached(cache=TTLCache(maxsize=500, ttl=CACHE_TIME)) |
| def get_paper_title_from_arxiv_id(arxiv_id): |
| try: |
| return requests.get(f"https://huggingface.co/api/papers/{arxiv_id}").json()[ |
| "title" |
| ] |
| except Exception as e: |
| print(f"Error getting paper title for {arxiv_id}: {e}") |
| raise gr.Error("Error getting paper title for {arxiv_id}: {e}") from e |
|
|
|
|
| def format_recommendation_into_markdown(arxiv_id, recommendations): |
| |
| |
| |
| comment = "The following papers were recommended by the Semantic Scholar API \n\n" |
| for r in recommendations: |
| hub_paper_url = f"https://huggingface.co/papers/{r['externalIds']['ArXiv']}" |
| comment += f"* [{r['title']}]({hub_paper_url}) ({r['year']})\n" |
| return comment |
|
|
|
|
| def format_comment(result: str): |
| result = ( |
| "This is an automated message from the [Librarian Bot](https://huggingface.co/librarian-bots). I found the following papers similar to this paper. \n\n" |
| + result |
| ) |
| result += "\n\n Please give a thumbs up to this comment if you found it helpful!" |
| result += "\n\n If you want recommendations for any Paper on Hugging Face checkout [this](https://huggingface.co/spaces/librarian-bots/recommend_similar_papers) Space" |
| result += "\n\n You can directly ask Librarian Bot for paper recommendations by tagging it in a comment: `@librarian-bot recommend`" |
| return result |
|
|
|
|
| def post_comment( |
| paper_url: str, comment: str, comment_id: str | None = None, token: str = HF_TOKEN |
| ) -> Tuple[bool, str]: |
| """ |
| Post a comment on a paper or a reply to a comment using the Hugging Face API. |
| |
| Args: |
| paper_url (str): The URL of the paper to post the comment on. |
| comment (str): The text of the comment or reply to post. |
| comment_id (str, optional): The ID of the comment to reply to. If provided, the function will post a reply to the specified comment. Defaults to None. |
| token (str, optional): The authentication token to use for the API request. Defaults to HF_TOKEN. |
| |
| Returns: |
| Tuple[bool, str]: A tuple containing two elements: |
| - bool: True if the comment or reply was posted successfully, False otherwise. |
| - str: The ID of the posted comment or reply if successful, an empty string otherwise. |
| |
| Raises: |
| requests.exceptions.RequestException: If an error occurs while making the API request. |
| """ |
| try: |
| paper_id = paper_url.split("/")[-1] |
| if comment_id: |
| url = f"https://huggingface.co/api/papers/{paper_id}/comment/{comment_id}/reply" |
| gr.Info(f"Replying to comment {comment_id}") |
| print(f"Replying to comment {comment_id}") |
| else: |
| url = f"https://huggingface.co/api/papers/{paper_id}/comment" |
| print(f"Posting comment for {paper_url}") |
| gr.Info(f"Posting comment for {paper_url}") |
| headers = { |
| "Authorization": f"Bearer {token}", |
| "Content-Type": "application/json", |
| } |
| comment_data = {"comment": comment} |
|
|
| response = requests.post(url, json=comment_data, headers=headers) |
|
|
| if response.status_code == 201: |
| posted_comment_id = response.json().get("id", "") |
| if comment_id: |
| print( |
| f"Reply posted successfully to comment {comment_id} for {paper_url}. Reply ID: {posted_comment_id}" |
| ) |
| else: |
| print( |
| f"Comment posted successfully for {paper_url}. Comment ID: {posted_comment_id}" |
| ) |
| return True, posted_comment_id |
| else: |
| print( |
| f"Failed to post {'reply' if comment_id else 'comment'} for {paper_url}. Status code: {response.status_code}" |
| ) |
| print(f"Response text: {response.text}") |
| return False, "" |
|
|
| except requests.exceptions.RequestException as e: |
| print( |
| f"Error posting {'reply' if comment_id else 'comment'} for {paper_url}: {e}" |
| ) |
| return False, "" |
|
|
|
|
| |
| |
| |
| |
|
|
| |
| |
|
|
| |
| |
| |
| |
| |
| |
|
|
|
|
| def check_if_lib_bot_comment_exists(paper_url: str) -> Tuple[bool, str]: |
| """ |
| Check if a comment or reply from the librarian-bot exists for a given paper URL using the Hugging Face API. |
| |
| Args: |
| paper_url (str): The URL of the paper to check for librarian-bot comments. |
| |
| Returns: |
| Tuple[bool, str]: A tuple containing two elements: |
| - bool: True if a comment or reply from the librarian-bot is found, False otherwise. |
| - str: The ID of the comment if a librarian-bot comment is found, an empty string otherwise. |
| |
| Raises: |
| Exception: If an error occurs while retrieving comments from the API. |
| """ |
| try: |
| paper_id = paper_url.split("/")[-1] |
| url = f"https://huggingface.co/api/papers/{paper_id}/?field=comments" |
| headers = {"Authorization": f"Bearer {HF_TOKEN}"} |
| response = requests.get(url, headers=headers) |
|
|
| if response.status_code == 200: |
| paper_data = response.json() |
| comments = paper_data.get("comments", []) |
| for comment in comments: |
| comment_author = comment.get("author", {}).get("name") |
| if comment_author == "librarian-bot": |
| return True, comment.get("id") |
| replies = comment.get("replies", []) |
| for reply in replies: |
| reply_author = reply.get("author", {}).get("name") |
| if reply_author == "librarian-bot": |
| return True, comment.get("id") |
| else: |
| print( |
| f"Failed to retrieve comments for {paper_url}. Status code: {response.status_code}" |
| ) |
|
|
| return False, "" |
| except Exception as e: |
| print(f"Error checking if comment exists for {paper_url}: {e}") |
| return True, "" |
|
|
|
|
| def log_comments(paper_url: str, comment: str): |
| """ |
| Logs comments for a given paper URL. |
| |
| Args: |
| paper_url (str): The URL of the paper. |
| comment (str): The comment to be logged. |
| |
| Returns: |
| None |
| """ |
| paper_id = paper_url.split("/")[-1] |
| file_path = Path(f"comments/{paper_id}.json") |
| if not file_path.exists(): |
| with scheduler.lock: |
| with open(file_path, "w") as f: |
| data = {"paper_url": paper_url, "comment": comment} |
| json.dump(data, f) |
|
|
|
|
| def return_recommendations( |
| url: str, comment_id: str = "", post_to_paper: bool = True |
| ) -> str: |
| """Find similar academic papers using Semantic Scholar's recommendation API. |
| |
| Given a Hugging Face Papers URL, this tool retrieves related papers that may be |
| of interest to researchers exploring the same topic. Recommendations are based |
| on Semantic Scholar's citation graph and content similarity. |
| |
| Args: |
| url: A Hugging Face Papers URL (e.g., https://huggingface.co/papers/2309.12307) |
| comment_id: Optional comment ID to reply to (for API usage only) |
| post_to_paper: If True, posts recommendations as a comment on the paper page |
| |
| Returns: |
| Markdown-formatted list of recommended papers with titles, links, and years |
| """ |
| arxiv_id = parse_arxiv_id_from_paper_url(url) |
| recommendations = get_recommendations_from_semantic_scholar(f"ArXiv:{arxiv_id}") |
| filtered_recommendations = filter_recommendations(recommendations) |
| formatted_recommendation = format_recommendation_into_markdown( |
| arxiv_id, filtered_recommendations |
| ) |
|
|
| if post_to_paper: |
| comment = format_comment(formatted_recommendation) |
|
|
| |
| existing_comments, existing_comment_id = check_if_lib_bot_comment_exists(url) |
| if existing_comments: |
| gr.Info( |
| f"Librarian-bot already commented on this paper. Comment ID: {existing_comment_id}. No further action will be taken." |
| ) |
| else: |
| |
| if comment_id: |
| comment_status, posted_comment_id = post_comment( |
| url, comment, comment_id, token=HF_TOKEN |
| ) |
| if comment_status: |
| log_comments(url, comment) |
| gr.Info(f"Posted reply to comment {posted_comment_id}") |
| else: |
| |
| comment_status, posted_comment_id = post_comment( |
| url, comment, token=HF_TOKEN |
| ) |
| if comment_status: |
| log_comments(url, comment) |
| gr.Info(f"Posted new comment {posted_comment_id}") |
|
|
| if not comment_status: |
| gr.Info("Failed to post comment") |
|
|
| return formatted_recommendation |
|
|
|
|
| title = "Semantic Scholar Paper Recommender" |
| description = ( |
| "Paste a link to a paper on Hugging Face Papers and get recommendations for similar" |
| " papers from Semantic Scholar. **Note**: Some papers may not have recommendations" |
| " yet if they are new or have not been indexed by Semantic Scholar." |
| ) |
| examples = [ |
| ["https://huggingface.co/papers/2309.12307", None, False], |
| ["https://huggingface.co/papers/2211.10086", None, False], |
| ] |
| interface = gr.Interface( |
| return_recommendations, |
| [ |
| gr.Textbox(lines=1), |
| gr.Textbox(value="", lines=1, label="Comment ID (only for API)", visible=False), |
| gr.Checkbox(False, label="Post recommendations to Paper page?"), |
| ], |
| gr.Markdown(), |
| examples=examples, |
| title=title, |
| description=description, |
| ) |
| interface.queue() |
| interface.launch(mcp_server=True) |
|
|