Spaces:
Sleeping
Sleeping
| from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool | |
| import datetime | |
| import requests | |
| import pytz | |
| import yaml | |
| from tools.final_answer import FinalAnswerTool | |
| from Gradio_UI import GradioUI | |
| def get_top_daily_paper() -> str: | |
| """ | |
| Retrieves the current top upvoted paper from Hugging Face Daily Papers. | |
| Returns: | |
| str: The title and URL of the paper. | |
| """ | |
| try: | |
| import requests | |
| from bs4 import BeautifulSoup | |
| url = "https://huggingface.co/papers" | |
| resp = requests.get(url, timeout=10) | |
| resp.raise_for_status() | |
| soup = BeautifulSoup(resp.content, "html.parser") | |
| # Find the first article (top paper by upvotes) | |
| paper = soup.select_one("article") | |
| if not paper: | |
| return "🔍 Error: No papers found on the page" | |
| # Find the title and link within the h3 > a structure | |
| title_element = paper.select_one("h3 a") | |
| if not title_element: | |
| return "🔍 Error: Could not find paper title" | |
| title = title_element.get_text(strip=True) | |
| link = title_element.get("href") | |
| if not link: | |
| return "🔍 Error: Could not find paper link" | |
| full_url = f"https://huggingface.co{link}" | |
| return f"Top Daily Paper: {title} — {full_url}" | |
| except Exception as e: | |
| return f"🔍 Error fetching top paper: {str(e)}" | |
| def get_paper_abstract(paper_url: str) -> str: | |
| """ | |
| Retrieves the abstract from a Hugging Face paper page. | |
| Args: | |
| paper_url: The URL of the paper page | |
| Returns: | |
| str: The paper abstract including AI summary if available | |
| """ | |
| try: | |
| import requests | |
| from bs4 import BeautifulSoup | |
| resp = requests.get(paper_url, timeout=10) | |
| resp.raise_for_status() | |
| soup = BeautifulSoup(resp.content, "html.parser") | |
| # Find the abstract section | |
| abstract_section = soup.find("h2", string="Abstract") | |
| if not abstract_section: | |
| return "🔍 Error: Abstract section not found" | |
| # Get the parent container of the abstract | |
| abstract_container = abstract_section.find_next_sibling("div") | |
| if not abstract_container: | |
| return "🔍 Error: Abstract content not found" | |
| result_parts = [] | |
| # Look for AI-generated summary (blue box) | |
| ai_summary = abstract_container.select_one(".bg-blue-500\\/6 p") | |
| if ai_summary: | |
| summary_text = ai_summary.get_text(strip=True) | |
| result_parts.append(f"🤖 AI Summary: {summary_text}") | |
| # Get the main abstract text | |
| main_abstract = abstract_container.select_one("p.text-gray-600") | |
| if main_abstract: | |
| # Clean up the text by removing link artifacts and extra spaces | |
| abstract_text = "" | |
| for element in main_abstract.descendants: | |
| if element.name is None: # Text node | |
| abstract_text += element.strip() + " " | |
| abstract_text = " ".join(abstract_text.split()) # Normalize whitespace | |
| result_parts.append(f"📄 Abstract: {abstract_text}") | |
| if not result_parts: | |
| return "🔍 Error: No abstract content found" | |
| return "\n\n".join(result_parts) | |
| except Exception as e: | |
| return f"🔍 Error fetching abstract: {str(e)}" | |
| def summarize_text(text: str, max_sentences: int = 3, model_name: str = "google/pegasus-cnn_dailymail") -> str: | |
| """ | |
| Summarize a body of text using a Hugging Face Transformers pipeline. | |
| Args: | |
| text: The text to be summarized. | |
| max_sentences: Approximate upper limit for the number of sentences in the output. | |
| model_name: The Hugging Face model to use for summarization (default is a CNN/DailyMail–fine‑tuned Pegasus). | |
| Returns: | |
| A concise summary string, or an error message. | |
| """ | |
| try: | |
| from transformers import pipeline | |
| # Load summarization pipeline once (could be optimized by caching) | |
| summarizer = pipeline("summarization", model=model_name) | |
| # Heuristically chunk long text into manageable parts for the model | |
| max_chunk = 1024 # tokens; varies by model | |
| # Naive chunking, splitting on sentences or whitespace: | |
| chunks = [text[i:i + max_chunk] for i in range(0, len(text), max_chunk)] | |
| # Summarize each chunk | |
| summaries = [] | |
| for chunk in chunks: | |
| out = summarizer(chunk, | |
| max_length=max_sentences * 20, | |
| min_length=max_sentences * 10, | |
| do_sample=False) | |
| summaries.append(out[0]['summary_text']) | |
| # Combine chunk-level summaries, optionally resummarize | |
| combined = " ".join(summaries) | |
| if len(chunks) > 1: | |
| final = summarizer(combined, | |
| max_length=max_sentences * 20, | |
| min_length=max_sentences * 10, | |
| do_sample=False)[0]['summary_text'] | |
| return final | |
| else: | |
| return summaries[0] | |
| except Exception as e: | |
| return f"Error during summarization: {e}" | |
| def get_current_time_in_timezone(timezone: str) -> str: | |
| """A tool that fetches the current local time in a specified timezone. | |
| Args: | |
| timezone: A string representing a valid timezone (e.g., 'America/New_York'). | |
| """ | |
| try: | |
| # Create timezone object | |
| tz = pytz.timezone(timezone) | |
| # Get current time in that timezone | |
| local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S") | |
| return f"The current local time in {timezone} is: {local_time}" | |
| except Exception as e: | |
| return f"Error fetching time for timezone '{timezone}': {str(e)}" | |
| final_answer = FinalAnswerTool() | |
| # If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder: | |
| # model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud' | |
| model = HfApiModel( | |
| max_tokens=2096, | |
| temperature=0.5, | |
| model_id='Qwen/Qwen2.5-Coder-32B-Instruct',# it is possible that this model may be overloaded | |
| custom_role_conversions=None, | |
| ) | |
| # Import tool from Hub | |
| image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True) | |
| with open("prompts.yaml", 'r') as stream: | |
| prompt_templates = yaml.safe_load(stream) | |
| agent = CodeAgent( | |
| model=model, | |
| tools=[final_answer,get_current_time_in_timezone,get_top_daily_paper,get_paper_abstract,summarize_text], ## add your tools here (don't remove final answer) | |
| max_steps=6, | |
| verbosity_level=1, | |
| grammar=None, | |
| planning_interval=None, | |
| name=None, | |
| description=None, | |
| prompt_templates=prompt_templates | |
| ) | |
| GradioUI(agent).launch() |