agents-course-final-assignment

Runtime error

App Files Files Community

abtsousa commited on Aug 13, 2025

Commit

3adfe4f

1 Parent(s): f37e95b

Added wikipedia tool.

Browse files

Files changed (2) hide show

tools/__init__.py +6 -8
tools/wikipedia.py +143 -0

tools/__init__.py CHANGED Viewed

@@ -1,21 +1,19 @@
-from .wikipedia import wiki_search
-from langchain_core.tools import StructuredTool
 from langchain_community.document_loaders import WikipediaLoader
 from langchain_core.tools import render_text_description_and_args
 from langchain_core.tools import tool
-def get_all_tools() -> list[StructuredTool]:
     """
-    Get all available tools as a list of LangChain StructuredTool instances.
     Returns:
-        List of StructuredTool instances ready for use with LangChain agents
     """
     tools = []
-    # Add Wikipedia tool
-    wikipedia_tool = wiki_search
-    tools.append(wikipedia_tool)
     return tools

+from .wikipedia import fetch_wikipedia_content
+from langchain_core.tools import BaseTool
 from langchain_community.document_loaders import WikipediaLoader
 from langchain_core.tools import render_text_description_and_args
 from langchain_core.tools import tool
+def get_all_tools() -> list[BaseTool]:
     """
+    Get all available tools as a list of LangChain BaseTool instances.
     Returns:
+        List of BaseTool instances ready for use with LangChain agents
     """
     tools = []
+    tools.append(fetch_wikipedia_content)
     return tools

tools/wikipedia.py ADDED Viewed

	@@ -0,0 +1,143 @@

+from langchain_core.tools import tool
+from langchain_community.document_loaders import WikipediaLoader
+from typing import Optional
+@tool
+def fetch_wikipedia_content(query: str, top_k: int = 2, max_chars_per_doc: int = 16000, specific_section: Optional[str] = None) -> dict[str, str]:
+    """Wikipedia search with optional section-based content management.
+    Args:
+        query: The search query.
+        top_k (Optional): The number of top results to return (default: 2).
+        max_chars_per_doc (Optional): Maximum characters per document (default: 16000).
+        specific_section (Optional): If provided, return only this specific section (use section name from sections list).
+    """
+    # Load documents with full content to analyze sections
+    search_docs = WikipediaLoader(
+        query=query,
+        load_max_docs=top_k,
+        doc_content_chars_max=128000  # Load full content to analyze sections
+    ).load()
+    formatted_docs = []
+    for doc in search_docs:
+        content = doc.page_content
+        source = doc.metadata.get("source", "")
+        page = doc.metadata.get("page", "")
+        if specific_section:
+            # Return only the requested section
+            section_content = _extract_specific_section(content, specific_section)
+            if section_content:
+                formatted_doc = f'<Document source="{source}" page="{page}" section="{specific_section}"/>\n{section_content}\n</Document>'
+            else:
+                formatted_doc = f'<Document source="{source}" page="{page}"/>\nSection "{specific_section}" not found. Available sections: {_get_section_names(content)}\n</Document>'
+        else:
+            # Process with section-based distribution
+            processed_content, sections_info = _process_content_by_sections(content, max_chars_per_doc)
+            formatted_doc = f'<Document source="{source}" page="{page}"/>\n{sections_info}\n\n{processed_content}\n</Document>'
+        formatted_docs.append(formatted_doc)
+    return {"wiki_results": "\n\n---\n\n".join(formatted_docs)}
+def _get_sections(content: str) -> list[tuple[str, str]]:
+    """Parse Wikipedia content into top-level sections only.
+    Returns:
+        List of (section_name, section_content) tuples
+    """
+    sections = []
+    lines = content.split('\n')
+    current_section = "Introduction"
+    current_content = []
+    for line in lines:
+        # Check if this is a TOP-LEVEL section header (== but not ===)
+        if line.strip().startswith('==') and not line.strip().startswith('==='):
+            # Save previous section
+            if current_content:
+                sections.append((current_section, '\n'.join(current_content)))
+            # Start new section
+            current_section = line.strip().replace('=', '').strip()
+            current_content = []
+        else:
+            current_content.append(line)
+    # Add the last section
+    if current_content:
+        sections.append((current_section, '\n'.join(current_content)))
+    return sections
+def _get_section_names(content: str) -> str:
+    """Get a comma-separated list of section names."""
+    sections = _get_sections(content)
+    return ", ".join([section[0] for section in sections])
+def _extract_specific_section(content: str, section_name: str) -> Optional[str]:
+    """Extract a specific section from Wikipedia content."""
+    sections = _get_sections(content)
+    # Look for exact match first
+    for name, content_part in sections:
+        if name.lower() == section_name.lower():
+            return content_part
+    # Look for partial match
+    for name, content_part in sections:
+        if section_name.lower() in name.lower():
+            return content_part
+    return None
+def _process_content_by_sections(content: str, max_chars: int) -> tuple[str, str]:
+    """Process content by distributing characters equally across sections.
+    Returns:
+        Tuple of (processed_content, sections_info)
+    """
+    sections = _get_sections(content)
+    # If content is under limit, return as is
+    total_chars = len(content)
+    if total_chars <= max_chars:
+        sections_info = f"Available sections: {_get_section_names(content)}"
+        return content, sections_info
+    # Calculate chars per section
+    num_sections = len(sections)
+    if num_sections == 0:
+        return content[:max_chars] + "...", "No sections found, content truncated"
+    chars_per_section = max_chars // num_sections
+    reserve_chars = max_chars % num_sections  # Extra chars to distribute
+    processed_sections = []
+    for i, (section_name, section_content) in enumerate(sections):
+        # Give some sections a few extra characters from the reserve
+        section_limit = chars_per_section + (1 if i < reserve_chars else 0)
+        if len(section_content) > section_limit:
+            truncated_content = section_content[:section_limit-3] + "..."
+        else:
+            truncated_content = section_content
+        processed_sections.append(f"== {section_name} ==\n{truncated_content}")
+    sections_info = f"Content truncated due to {max_chars} char limit. Available sections: {_get_section_names(content)}. Use specific_section parameter to load any section in full."
+    return "\n\n".join(processed_sections), sections_info
+if __name__ == "__main__":
+    # Example usage
+    query = "Albert Einstein"
+    result = fetch_wikipedia_content.invoke(query, top_k=1, max_chars_per_doc=16000)
+    print(result['wiki_results'])
+    # Very small page example
+    query = "Nico Ditch"
+    result = fetch_wikipedia_content.invoke(query, top_k=1, max_chars_per_doc=16000)
+    print(result['wiki_results'])