First_agent

Sleeping

File size: 7,031 Bytes

from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool
import datetime
import requests
import pytz
import yaml
from tools.final_answer import FinalAnswerTool

from Gradio_UI import GradioUI


@tool
def get_top_daily_paper() -> str:
    """
    Retrieves the current top upvoted paper from Hugging Face Daily Papers.
    Returns:
        str: The title and URL of the paper.
    """
    try:
        import requests
        from bs4 import BeautifulSoup
        url = "https://huggingface.co/papers"
        resp = requests.get(url, timeout=10)
        resp.raise_for_status()
        soup = BeautifulSoup(resp.content, "html.parser")
        
        # Find the first article (top paper by upvotes)
        paper = soup.select_one("article")
        if not paper:
            return "🔍 Error: No papers found on the page"
            
        # Find the title and link within the h3 > a structure
        title_element = paper.select_one("h3 a")
        if not title_element:
            return "🔍 Error: Could not find paper title"
        
        title = title_element.get_text(strip=True)
        link = title_element.get("href")
        
        if not link:
            return "🔍 Error: Could not find paper link"
            
        full_url = f"https://huggingface.co{link}"
        return f"Top Daily Paper: {title} — {full_url}"
    except Exception as e:
        return f"🔍 Error fetching top paper: {str(e)}"

@tool
def get_paper_abstract(paper_url: str) -> str:
    """
    Retrieves the abstract from a Hugging Face paper page.
    Args:
        paper_url: The URL of the paper page
    Returns:
        str: The paper abstract including AI summary if available
    """
    try:
        import requests
        from bs4 import BeautifulSoup
        
        resp = requests.get(paper_url, timeout=10)
        resp.raise_for_status()
        soup = BeautifulSoup(resp.content, "html.parser")
        
        # Find the abstract section
        abstract_section = soup.find("h2", string="Abstract")
        if not abstract_section:
            return "🔍 Error: Abstract section not found"
        
        # Get the parent container of the abstract
        abstract_container = abstract_section.find_next_sibling("div")
        if not abstract_container:
            return "🔍 Error: Abstract content not found"
        
        result_parts = []
        
        # Look for AI-generated summary (blue box)
        ai_summary = abstract_container.select_one(".bg-blue-500\\/6 p")
        if ai_summary:
            summary_text = ai_summary.get_text(strip=True)
            result_parts.append(f"🤖 AI Summary: {summary_text}")
        
        # Get the main abstract text
        main_abstract = abstract_container.select_one("p.text-gray-600")
        if main_abstract:
            # Clean up the text by removing link artifacts and extra spaces
            abstract_text = ""
            for element in main_abstract.descendants:
                if element.name is None:  # Text node
                    abstract_text += element.strip() + " "
            
            abstract_text = " ".join(abstract_text.split())  # Normalize whitespace
            result_parts.append(f"📄 Abstract: {abstract_text}")
        
        if not result_parts:
            return "🔍 Error: No abstract content found"
        
        return "\n\n".join(result_parts)
        
    except Exception as e:
        return f"🔍 Error fetching abstract: {str(e)}"

@tool
def summarize_text(text: str, max_sentences: int = 3, model_name: str = "google/pegasus-cnn_dailymail") -> str:
    """
    Summarize a body of text using a Hugging Face Transformers pipeline.

    Args:
        text: The text to be summarized.
        max_sentences: Approximate upper limit for the number of sentences in the output.
        model_name: The Hugging Face model to use for summarization (default is a CNN/DailyMail–fine‑tuned Pegasus).

    Returns:
        A concise summary string, or an error message.
    """
    try:
        from transformers import pipeline

        # Load summarization pipeline once (could be optimized by caching)
        summarizer = pipeline("summarization", model=model_name)

        # Heuristically chunk long text into manageable parts for the model
        max_chunk = 1024  # tokens; varies by model
        # Naive chunking, splitting on sentences or whitespace:
        chunks = [text[i:i + max_chunk] for i in range(0, len(text), max_chunk)]

        # Summarize each chunk
        summaries = []
        for chunk in chunks:
            out = summarizer(chunk,
                             max_length=max_sentences * 20,
                             min_length=max_sentences * 10,
                             do_sample=False)
            summaries.append(out[0]['summary_text'])

        # Combine chunk-level summaries, optionally resummarize
        combined = " ".join(summaries)
        if len(chunks) > 1:
            final = summarizer(combined,
                               max_length=max_sentences * 20,
                               min_length=max_sentences * 10,
                               do_sample=False)[0]['summary_text']
            return final
        else:
            return summaries[0]

    except Exception as e:
        return f"Error during summarization: {e}"

@tool
def get_current_time_in_timezone(timezone: str) -> str:
    """A tool that fetches the current local time in a specified timezone.
    Args:
        timezone: A string representing a valid timezone (e.g., 'America/New_York').
    """
    try:
        # Create timezone object
        tz = pytz.timezone(timezone)
        # Get current time in that timezone
        local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
        return f"The current local time in {timezone} is: {local_time}"
    except Exception as e:
        return f"Error fetching time for timezone '{timezone}': {str(e)}"


final_answer = FinalAnswerTool()

# If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
# model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud' 

model = HfApiModel(
max_tokens=2096,
temperature=0.5,
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',# it is possible that this model may be overloaded
custom_role_conversions=None,
)


# Import tool from Hub
image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)

with open("prompts.yaml", 'r') as stream:
    prompt_templates = yaml.safe_load(stream)
    
agent = CodeAgent(
    model=model,
    tools=[final_answer,get_current_time_in_timezone,get_top_daily_paper,get_paper_abstract,summarize_text], ## add your tools here (don't remove final answer)
    max_steps=6,
    verbosity_level=1,
    grammar=None,
    planning_interval=None,
    name=None,
    description=None,
    prompt_templates=prompt_templates
)


GradioUI(agent).launch()