First_agent_template

Sleeping

App Files Files Community

inank commited on Feb 22

Commit

fd4e7f3

verified ·

1 Parent(s): 6d3d769

feat: add new tools & enable file upload

Browse files

Added PDF extractor and a text analyze-and-summarize tool.
Enabled file upload by providing a temp folder to the app launcher.

Files changed (1) hide show

tools/pdf_extractor.py +105 -20

tools/pdf_extractor.py CHANGED Viewed

@@ -1,30 +1,115 @@
-from smolagents import tool
-import PyPDF2
 @tool
-def extract_text_from_pdf(pdf_path: str) -> str:
-    """Extracts all text content from a PDF file.
     Args:
-        pdf_path: The file path to the PDF file to extract text from (e.g., '/tmp/document.pdf')
     Returns:
-        The extracted text content from the PDF file
     """
     try:
-        extracted_text = []
-        with open(pdf_path, 'rb') as pdf_file:
-            pdf_reader = PyPDF2.PdfReader(pdf_file)
-            num_pages = len(pdf_reader.pages)
-            for page_num in range(num_pages):
-                page = pdf_reader.pages[page_num]
-                text = page.extract_text()
-                extracted_text.append(f"--- Page {page_num + 1} ---\n{text}")
-        return "\n\n".join(extracted_text)
-    except FileNotFoundError:
-        return f"Error: PDF file not found at path: {pdf_path}"
     except Exception as e:
-        return f"Error extracting text from PDF: {str(e)}"

+from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool
+import datetime
+import requests
+import pytz
+import yaml
+from tools.final_answer import FinalAnswerTool
+from tools.pdf_extractor import extract_text_from_pdf
+from Gradio_UI import GradioUI
 @tool
+def summarize_and_analyze_text(text: str, max_sentences: int = 5) -> str:
+    """Analyzes and summarizes text content, extracting key information and main ideas.
+    This tool intelligently condenses lengthy text into concise summaries while preserving
+    the most important information. Perfect for processing search results, PDFs, and documents.
     Args:
+        text: The text content to summarize and analyze
+        max_sentences: Maximum number of sentences in the summary (default: 5)
     Returns:
+        A formatted summary containing key points and main ideas from the text
     """
     try:
+        # Remove extra whitespace and normalize text
+        text = " ".join(text.split())
+        if len(text) < 100:
+            return f"Text is too short to summarize. Original text:\n{text}"
+        # Split into sentences (simple approach)
+        sentences = []
+        import re
+        for sent in re.split(r'(?<=[.!?])\s+', text):
+            sent = sent.strip()
+            if sent:
+                sentences.append(sent)
+        # Score sentences based on word frequency
+        words = text.lower().split()
+        word_freq = {}
+        for word in words:
+            if len(word) > 3:  # Filter short words
+                word_freq[word] = word_freq.get(word, 0) + 1
+        # Select top sentences
+        sentence_scores = []
+        for i, sent in enumerate(sentences):
+            score = sum(word_freq.get(word.lower(), 0) for word in sent.split())
+            sentence_scores.append((i, score, sent))
+        # Sort by original order but select based on scores
+        top_indices = sorted([idx for idx, _, _ in sorted(sentence_scores, key=lambda x: -x[1])[:max_sentences]])
+        summary_sentences = [sent for idx, _, sent in sentence_scores if idx in top_indices]
+        summary = " ".join(summary_sentences)
+        # Extract key entities (words that appear frequently)
+        sorted_words = sorted(word_freq.items(), key=lambda x: -x[1])
+        key_terms = ", ".join([word for word, _ in sorted_words[:5]])
+        return f"""📋 SUMMARY:\n{summary}\n\n🔑 KEY TERMS: {key_terms}\n\n📊 ANALYSIS:\n- Text length: {len(text)} characters\n- Total sentences: {len(sentences)}\n- Summary length: {len(summary_sentences)} sentences"""
     except Exception as e:
+        return f"Error analyzing text: {str(e)}"
+@tool
+def get_current_time_in_timezone(timezone: str) -> str:
+    """A tool that fetches the current local time in a specified timezone.
+    Args:
+        timezone: A string representing a valid timezone (e.g., 'America/New_York').
+    """
+    try:
+        # Create timezone object
+        tz = pytz.timezone(timezone)
+        # Get current time in that timezone
+        local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
+        return f"The current local time in {timezone} is: {local_time}"
+    except Exception as e:
+        return f"Error fetching time for timezone '{timezone}': {str(e)}"
+final_answer = FinalAnswerTool()
+# If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
+# model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud'
+model = HfApiModel(
+max_tokens=2096,
+temperature=0.5,
+model_id='Qwen/Qwen2.5-Coder-32B-Instruct',# it is possible that this model may be overloaded
+custom_role_conversions=None,
+)
+# Import tool from Hub
+image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
+with open("prompts.yaml", 'r') as stream:
+    prompt_templates = yaml.safe_load(stream)
+agent = CodeAgent(
+    model=model,
+    tools=[image_generation_tool,get_current_time_in_timezone,extract_text_from_pdf,summarize_and_analyze_text,final_answer], ## add your tools here (don't remove final answer)
+    max_steps=6,
+    verbosity_level=1,
+    grammar=None,
+    planning_interval=None,
+    name=None,
+    description=None,
+    prompt_templates=prompt_templates
+)
+GradioUI(agent, "/tmp").launch()