Spaces:
Sleeping
Sleeping
| from dataset_handler import DatasetHandler | |
| from typing import List, Dict, Optional | |
| dataset_handler = DatasetHandler(use_streaming=True) | |
| def search_agricultural_documents(keyword: str, limit: int = 5) -> str: | |
| """ | |
| Search for agricultural research documents by keyword. | |
| This function searches the CGIAR dataset for documents containing the specified keyword | |
| in their title, abstract, or keywords. Use this when the user asks about specific | |
| agricultural topics, crops, techniques, or concepts. | |
| Args: | |
| keyword: The search keyword (e.g., "rice", "pest control", "climate adaptation") | |
| limit: Maximum number of documents to return (default: 5) | |
| Returns: | |
| A formatted string containing information about matching documents | |
| """ | |
| try: | |
| print(f"[FUNCTION] Searching for '{keyword}' (limit: {limit})...") | |
| results = dataset_handler.search_by_keyword(keyword, limit) | |
| if not results: | |
| return f"No documents found matching '{keyword}' after searching the dataset. The search may have been limited due to network timeouts. Try a different search term or a more specific keyword." | |
| response = f"Found {len(results)} document(s) matching '{keyword}':\n\n" | |
| for i, doc in enumerate(results, 1): | |
| response += f"{i}. {dataset_handler.format_document_summary(doc)}\n\n" | |
| return response | |
| except Exception as e: | |
| error_msg = str(e) | |
| if "timeout" in error_msg.lower() or "timed out" in error_msg.lower(): | |
| return f"Search timed out while accessing the dataset. This can happen when the dataset is under heavy load. Please try again in a moment or use a more specific search term." | |
| return f"Error searching documents: {error_msg}" | |
| def get_document_details(title: str) -> str: | |
| """ | |
| Get detailed information about a specific document by its title. | |
| Use this function when the user asks for more details about a specific research paper | |
| or document that was mentioned in previous search results. | |
| Args: | |
| title: The exact title of the document | |
| Returns: | |
| Detailed information about the document including chapters and figures | |
| """ | |
| try: | |
| doc = dataset_handler.get_document_by_title(title) | |
| if not doc: | |
| return f"Document with title '{title}' not found. Please check the title and try again." | |
| response = f"**Document Details:**\n\n" | |
| response += dataset_handler.format_document_summary(doc) | |
| # Add chapter information | |
| if doc.get('chapters'): | |
| response += f"\n**Chapters:** {len(doc['chapters'])} chapters found\n" | |
| for i, chapter in enumerate(doc['chapters'][:5], 1): # Show first 5 chapters | |
| response += f" {i}. {chapter.get('head', 'Untitled')}\n" | |
| # Add figures information | |
| if doc.get('figures'): | |
| response += f"\n**Figures/Tables:** {len(doc['figures'])} found\n" | |
| return response | |
| except Exception as e: | |
| return f"Error retrieving document: {str(e)}" | |
| def browse_topics(topic: str = None) -> str: | |
| """ | |
| Browse agricultural research documents by topic. | |
| Common topics include: crop management, pest control, climate adaptation, | |
| farming systems, soil management, water management, sustainable agriculture, | |
| small-scale farming, agricultural extension, food security. | |
| Args: | |
| topic: Optional specific topic to browse. If None, returns random documents. | |
| Returns: | |
| Information about documents related to the topic | |
| """ | |
| try: | |
| if topic: | |
| results = dataset_handler.search_by_topic(topic, limit=5) | |
| if not results: | |
| return f"No documents found for topic '{topic}'. Try a different topic." | |
| response = f"Documents related to '{topic}':\n\n" | |
| for i, doc in enumerate(results, 1): | |
| response += f"{i}. {dataset_handler.format_document_summary(doc)}\n\n" | |
| else: | |
| results = dataset_handler.get_random_documents(limit=3) | |
| response = "Sample agricultural research documents:\n\n" | |
| for i, doc in enumerate(results, 1): | |
| response += f"{i}. {dataset_handler.format_document_summary(doc)}\n\n" | |
| return response | |
| except Exception as e: | |
| return f"Error browsing topics: {str(e)}" | |
| def get_dataset_info() -> str: | |
| """ | |
| Get information about the dataset. | |
| Returns: | |
| Information about the CGIAR dataset | |
| """ | |
| try: | |
| if not dataset_handler.loaded: | |
| dataset_handler.load_dataset() | |
| if dataset_handler.use_streaming: | |
| total_docs = "45,232+ (streaming mode)" | |
| else: | |
| total_docs = f"{len(dataset_handler.dataset):,}" | |
| return f"""**CGIAR Agricultural Research Dataset** | |
| This dataset contains {total_docs} agricultural research publications from CGIAR, | |
| specifically processed for AI applications in agricultural advisory services. | |
| **Dataset Features:** | |
| - Comprehensive collection of agricultural research papers | |
| - Topics include: crop management, pest control, climate adaptation, farming systems, | |
| soil management, water management, sustainable agriculture, and more | |
| - Documents are structured with metadata, abstracts, keywords, chapters, and figures | |
| - Focus on small-scale producer contexts in low and middle-income countries | |
| **Source:** GARDIAN (CGIAR's agri-food data hub) | |
| **License:** CC-BY-4.0 | |
| **Note:** Dataset is loaded in streaming mode for faster access. | |
| """ | |
| except Exception as e: | |
| return f"Error getting dataset info: {str(e)}" | |
| # List of available functions for the LLM agent | |
| AVAILABLE_FUNCTIONS = { | |
| "search_agricultural_documents": { | |
| "function": search_agricultural_documents, | |
| "description": "Search for agricultural research documents by keyword. Use when user asks about specific topics, crops, or agricultural concepts.", | |
| "parameters": { | |
| "type": "object", | |
| "properties": { | |
| "keyword": { | |
| "type": "string", | |
| "description": "The search keyword (e.g., 'rice', 'pest control', 'climate adaptation')" | |
| }, | |
| "limit": { | |
| "type": "integer", | |
| "description": "Maximum number of documents to return (default: 5)", | |
| "default": 5 | |
| } | |
| }, | |
| "required": ["keyword"] | |
| } | |
| }, | |
| "get_document_details": { | |
| "function": get_document_details, | |
| "description": "Get detailed information about a specific document by its exact title. Use when user asks for more details about a specific paper.", | |
| "parameters": { | |
| "type": "object", | |
| "properties": { | |
| "title": { | |
| "type": "string", | |
| "description": "The exact title of the document" | |
| } | |
| }, | |
| "required": ["title"] | |
| } | |
| }, | |
| "browse_topics": { | |
| "function": browse_topics, | |
| "description": "Browse documents by agricultural topic or get random sample documents. Common topics: crop management, pest control, climate adaptation, farming systems, etc.", | |
| "parameters": { | |
| "type": "object", | |
| "properties": { | |
| "topic": { | |
| "type": "string", | |
| "description": "Optional specific topic to browse. If not provided, returns random documents." | |
| } | |
| }, | |
| "required": [] | |
| } | |
| }, | |
| "get_dataset_info": { | |
| "function": get_dataset_info, | |
| "description": "Get information about the CGIAR dataset itself. Use when user asks about the dataset, its size, or what it contains.", | |
| "parameters": { | |
| "type": "object", | |
| "properties": {}, | |
| "required": [] | |
| } | |
| } | |
| } | |