llm-agent / agent_functions.py
=
Initial commit
d40b9df
from dataset_handler import DatasetHandler
from typing import List, Dict, Optional
dataset_handler = DatasetHandler(use_streaming=True)
def search_agricultural_documents(keyword: str, limit: int = 5) -> str:
"""
Search for agricultural research documents by keyword.
This function searches the CGIAR dataset for documents containing the specified keyword
in their title, abstract, or keywords. Use this when the user asks about specific
agricultural topics, crops, techniques, or concepts.
Args:
keyword: The search keyword (e.g., "rice", "pest control", "climate adaptation")
limit: Maximum number of documents to return (default: 5)
Returns:
A formatted string containing information about matching documents
"""
try:
print(f"[FUNCTION] Searching for '{keyword}' (limit: {limit})...")
results = dataset_handler.search_by_keyword(keyword, limit)
if not results:
return f"No documents found matching '{keyword}' after searching the dataset. The search may have been limited due to network timeouts. Try a different search term or a more specific keyword."
response = f"Found {len(results)} document(s) matching '{keyword}':\n\n"
for i, doc in enumerate(results, 1):
response += f"{i}. {dataset_handler.format_document_summary(doc)}\n\n"
return response
except Exception as e:
error_msg = str(e)
if "timeout" in error_msg.lower() or "timed out" in error_msg.lower():
return f"Search timed out while accessing the dataset. This can happen when the dataset is under heavy load. Please try again in a moment or use a more specific search term."
return f"Error searching documents: {error_msg}"
def get_document_details(title: str) -> str:
"""
Get detailed information about a specific document by its title.
Use this function when the user asks for more details about a specific research paper
or document that was mentioned in previous search results.
Args:
title: The exact title of the document
Returns:
Detailed information about the document including chapters and figures
"""
try:
doc = dataset_handler.get_document_by_title(title)
if not doc:
return f"Document with title '{title}' not found. Please check the title and try again."
response = f"**Document Details:**\n\n"
response += dataset_handler.format_document_summary(doc)
# Add chapter information
if doc.get('chapters'):
response += f"\n**Chapters:** {len(doc['chapters'])} chapters found\n"
for i, chapter in enumerate(doc['chapters'][:5], 1): # Show first 5 chapters
response += f" {i}. {chapter.get('head', 'Untitled')}\n"
# Add figures information
if doc.get('figures'):
response += f"\n**Figures/Tables:** {len(doc['figures'])} found\n"
return response
except Exception as e:
return f"Error retrieving document: {str(e)}"
def browse_topics(topic: str = None) -> str:
"""
Browse agricultural research documents by topic.
Common topics include: crop management, pest control, climate adaptation,
farming systems, soil management, water management, sustainable agriculture,
small-scale farming, agricultural extension, food security.
Args:
topic: Optional specific topic to browse. If None, returns random documents.
Returns:
Information about documents related to the topic
"""
try:
if topic:
results = dataset_handler.search_by_topic(topic, limit=5)
if not results:
return f"No documents found for topic '{topic}'. Try a different topic."
response = f"Documents related to '{topic}':\n\n"
for i, doc in enumerate(results, 1):
response += f"{i}. {dataset_handler.format_document_summary(doc)}\n\n"
else:
results = dataset_handler.get_random_documents(limit=3)
response = "Sample agricultural research documents:\n\n"
for i, doc in enumerate(results, 1):
response += f"{i}. {dataset_handler.format_document_summary(doc)}\n\n"
return response
except Exception as e:
return f"Error browsing topics: {str(e)}"
def get_dataset_info() -> str:
"""
Get information about the dataset.
Returns:
Information about the CGIAR dataset
"""
try:
if not dataset_handler.loaded:
dataset_handler.load_dataset()
if dataset_handler.use_streaming:
total_docs = "45,232+ (streaming mode)"
else:
total_docs = f"{len(dataset_handler.dataset):,}"
return f"""**CGIAR Agricultural Research Dataset**
This dataset contains {total_docs} agricultural research publications from CGIAR,
specifically processed for AI applications in agricultural advisory services.
**Dataset Features:**
- Comprehensive collection of agricultural research papers
- Topics include: crop management, pest control, climate adaptation, farming systems,
soil management, water management, sustainable agriculture, and more
- Documents are structured with metadata, abstracts, keywords, chapters, and figures
- Focus on small-scale producer contexts in low and middle-income countries
**Source:** GARDIAN (CGIAR's agri-food data hub)
**License:** CC-BY-4.0
**Note:** Dataset is loaded in streaming mode for faster access.
"""
except Exception as e:
return f"Error getting dataset info: {str(e)}"
# List of available functions for the LLM agent
AVAILABLE_FUNCTIONS = {
"search_agricultural_documents": {
"function": search_agricultural_documents,
"description": "Search for agricultural research documents by keyword. Use when user asks about specific topics, crops, or agricultural concepts.",
"parameters": {
"type": "object",
"properties": {
"keyword": {
"type": "string",
"description": "The search keyword (e.g., 'rice', 'pest control', 'climate adaptation')"
},
"limit": {
"type": "integer",
"description": "Maximum number of documents to return (default: 5)",
"default": 5
}
},
"required": ["keyword"]
}
},
"get_document_details": {
"function": get_document_details,
"description": "Get detailed information about a specific document by its exact title. Use when user asks for more details about a specific paper.",
"parameters": {
"type": "object",
"properties": {
"title": {
"type": "string",
"description": "The exact title of the document"
}
},
"required": ["title"]
}
},
"browse_topics": {
"function": browse_topics,
"description": "Browse documents by agricultural topic or get random sample documents. Common topics: crop management, pest control, climate adaptation, farming systems, etc.",
"parameters": {
"type": "object",
"properties": {
"topic": {
"type": "string",
"description": "Optional specific topic to browse. If not provided, returns random documents."
}
},
"required": []
}
},
"get_dataset_info": {
"function": get_dataset_info,
"description": "Get information about the CGIAR dataset itself. Use when user asks about the dataset, its size, or what it contains.",
"parameters": {
"type": "object",
"properties": {},
"required": []
}
}
}