Spaces:
Sleeping
Sleeping
refactor: Reduce verbosity in agent outputs and enhance message formatting in chat interface
b58981e
| from typing import List, Dict, Any | |
| from smolagents import CodeAgent, InferenceClientModel, tool | |
| import os | |
| import json | |
| from pathlib import Path | |
| from datetime import datetime | |
| AI_CATEGORIES = { | |
| "research_breakthroughs": { | |
| "name": "Research & Breakthroughs", | |
| "description": "Novel papers, theoretical advances, new architectures, state-of-the-art results.", | |
| "keywords": [ | |
| "paper", | |
| "arxiv", | |
| "research", | |
| "breakthrough", | |
| "novel", | |
| "theory", | |
| "architecture", | |
| "state-of-the-art", | |
| "sota", | |
| "academic", | |
| "study", | |
| "findings", | |
| "discovery", | |
| ], | |
| }, | |
| "model_releases": { | |
| "name": "Model Releases & Updates", | |
| "description": "Launches of new large-language or vision models, version upgrades, open-source checkpoints.", | |
| "keywords": [ | |
| "model", | |
| "release", | |
| "launch", | |
| "gpt", | |
| "llm", | |
| "vision", | |
| "checkpoint", | |
| "open-source", | |
| "version", | |
| "update", | |
| "huggingface", | |
| "anthropic", | |
| "openai", | |
| "google", | |
| "meta", | |
| ], | |
| }, | |
| "tools_frameworks": { | |
| "name": "Tools, Frameworks & Platforms", | |
| "description": "SDKs, libraries, cloud services, developer toolkits, hosting/serving solutions.", | |
| "keywords": [ | |
| "sdk", | |
| "library", | |
| "framework", | |
| "platform", | |
| "toolkit", | |
| "api", | |
| "cloud", | |
| "hosting", | |
| "serving", | |
| "deployment", | |
| "infrastructure", | |
| "docker", | |
| "kubernetes", | |
| "aws", | |
| "azure", | |
| "gcp", | |
| ], | |
| }, | |
| "applications_industry": { | |
| "name": "Applications & Industry Use Cases", | |
| "description": "AI in healthcare, finance, manufacturing, marketing, robotics—real-world deployments.", | |
| "keywords": [ | |
| "healthcare", | |
| "finance", | |
| "manufacturing", | |
| "marketing", | |
| "robotics", | |
| "deployment", | |
| "use-case", | |
| "industry", | |
| "application", | |
| "real-world", | |
| "production", | |
| "enterprise", | |
| "business", | |
| ], | |
| }, | |
| "regulation_ethics": { | |
| "name": "Regulation, Ethics & Policy", | |
| "description": "Government guidelines, ethical debates, bias/fairness studies, compliance news.", | |
| "keywords": [ | |
| "regulation", | |
| "ethics", | |
| "policy", | |
| "government", | |
| "guidelines", | |
| "bias", | |
| "fairness", | |
| "compliance", | |
| "law", | |
| "legal", | |
| "governance", | |
| "responsible", | |
| "ai-safety", | |
| "alignment", | |
| ], | |
| }, | |
| "investment_funding": { | |
| "name": "Investment, Funding & M&A", | |
| "description": "Venture rounds, strategic investments, acquisitions, startup valuations.", | |
| "keywords": [ | |
| "investment", | |
| "funding", | |
| "venture", | |
| "acquisition", | |
| "m&a", | |
| "startup", | |
| "valuation", | |
| "series", | |
| "round", | |
| "investor", | |
| "vc", | |
| "private-equity", | |
| "ipo", | |
| "financing", | |
| ], | |
| }, | |
| "benchmarks_leaderboards": { | |
| "name": "Benchmarks & Leaderboards", | |
| "description": "Performance comparisons, academic/industry challenges, leaderboard standings.", | |
| "keywords": [ | |
| "benchmark", | |
| "leaderboard", | |
| "performance", | |
| "comparison", | |
| "evaluation", | |
| "metric", | |
| "score", | |
| "ranking", | |
| "competition", | |
| "challenge", | |
| "test", | |
| "dataset", | |
| ], | |
| }, | |
| "community_events": { | |
| "name": "Community, Events & Education", | |
| "description": "Conferences, workshops, hackathons, courses, tutorials, webinars.", | |
| "keywords": [ | |
| "conference", | |
| "workshop", | |
| "hackathon", | |
| "course", | |
| "tutorial", | |
| "webinar", | |
| "education", | |
| "community", | |
| "event", | |
| "meetup", | |
| "training", | |
| "learning", | |
| "certification", | |
| ], | |
| }, | |
| "security_privacy": { | |
| "name": "Security, Privacy & Safety", | |
| "description": "Adversarial attacks, defensive techniques, data-privacy breakthroughs, AI safety research.", | |
| "keywords": [ | |
| "security", | |
| "privacy", | |
| "safety", | |
| "adversarial", | |
| "attack", | |
| "defense", | |
| "vulnerability", | |
| "protection", | |
| "encryption", | |
| "data-privacy", | |
| "gdpr", | |
| "cybersecurity", | |
| ], | |
| }, | |
| "market_trends": { | |
| "name": "Market Trends & Analysis", | |
| "description": "Adoption rates, market forecasts, analyst reports, surveys on AI usage.", | |
| "keywords": [ | |
| "market", | |
| "trends", | |
| "analysis", | |
| "forecast", | |
| "survey", | |
| "adoption", | |
| "report", | |
| "analyst", | |
| "growth", | |
| "statistics", | |
| "usage", | |
| "metrics", | |
| "insights", | |
| ], | |
| }, | |
| } | |
| def get_cache_file_path(): | |
| """Returns the path for the bookmark cache file.""" | |
| data_dir = Path("data") | |
| data_dir.mkdir(exist_ok=True) | |
| return str(data_dir / "ai_bookmarks_cache.json") | |
| def load_cache(): | |
| """Loads the bookmark cache from JSON file.""" | |
| cache_file = get_cache_file_path() | |
| if os.path.exists(cache_file): | |
| try: | |
| with open(cache_file, "r", encoding="utf-8") as f: | |
| return json.load(f) | |
| except Exception as e: | |
| print(f"Error loading cache: {e}") | |
| return {"bookmarks": [], "last_updated": None} | |
| def save_cache(cache_data): | |
| """Saves the bookmark cache to JSON file.""" | |
| cache_file = get_cache_file_path() | |
| try: | |
| with open(cache_file, "w", encoding="utf-8") as f: | |
| json.dump(cache_data, f, indent=2, ensure_ascii=False) | |
| return True | |
| except Exception as e: | |
| print(f"Error saving cache: {e}") | |
| return False | |
| def categorize_bookmark(bookmark: Dict[str, Any]) -> str: | |
| """ | |
| Categorizes a single bookmark based on title and URL using keyword matching. | |
| Args: | |
| bookmark: Dictionary containing bookmark data with title and url fields. | |
| Returns: | |
| String key of the most likely category, or 'uncategorized' if no match found. | |
| """ | |
| title = bookmark.get("title", "").lower() | |
| url = bookmark.get("url", "").lower() | |
| text_to_analyze = f"{title} {url}" | |
| category_scores = {} | |
| # Score each category based on keyword matches | |
| for category_key, category_data in AI_CATEGORIES.items(): | |
| score = 0 | |
| keywords = category_data["keywords"] | |
| for keyword in keywords: | |
| # Count occurrences of each keyword | |
| keyword_count = text_to_analyze.count(keyword.lower()) | |
| score += keyword_count | |
| # Bonus for exact matches in title | |
| if keyword.lower() in title: | |
| score += 2 | |
| category_scores[category_key] = score | |
| # Find the category with the highest score | |
| if max(category_scores.values()) > 0: | |
| return max(category_scores, key=category_scores.get) | |
| else: | |
| return "uncategorized" | |
| def categorize_all_bookmarks() -> Dict[str, Any]: | |
| """ | |
| Categorizes all bookmarks in the cache and adds category information to each bookmark. | |
| Updates the cache file with categorized bookmarks. | |
| Returns: | |
| Dictionary with categorization results and statistics. | |
| """ | |
| try: | |
| cache = load_cache() | |
| bookmarks = cache.get("bookmarks", []) | |
| if not bookmarks: | |
| return {"status": "error", "message": "No bookmarks found in cache"} | |
| categorized_count = 0 | |
| category_stats = {} | |
| # Initialize category stats | |
| for category_key in AI_CATEGORIES.keys(): | |
| category_stats[category_key] = 0 | |
| category_stats["uncategorized"] = 0 | |
| # Categorize each bookmark | |
| for bookmark in bookmarks: | |
| category = categorize_bookmark(bookmark) | |
| bookmark["category"] = category | |
| bookmark["category_name"] = AI_CATEGORIES.get(category, {}).get("name", "Uncategorized") | |
| category_stats[category] += 1 | |
| if category != "uncategorized": | |
| categorized_count += 1 | |
| # Update cache with categorized bookmarks | |
| cache["bookmarks"] = bookmarks | |
| cache["last_categorized"] = datetime.now().isoformat() | |
| cache["categorization_stats"] = category_stats | |
| if save_cache(cache): | |
| return { | |
| "status": "success", | |
| "message": f"Successfully categorized {categorized_count} out of {len(bookmarks)} bookmarks", | |
| "total_bookmarks": len(bookmarks), | |
| "categorized_bookmarks": categorized_count, | |
| "uncategorized_bookmarks": category_stats["uncategorized"], | |
| "category_breakdown": category_stats, | |
| } | |
| else: | |
| return {"status": "error", "message": "Failed to save categorized bookmarks to cache"} | |
| except Exception as e: | |
| return {"status": "error", "message": f"Error categorizing bookmarks: {str(e)}"} | |
| def get_bookmarks_by_category(category: str) -> List[Dict[str, Any]]: | |
| """ | |
| Gets all bookmarks belonging to a specific category. | |
| Args: | |
| category: Category key (e.g., 'research_breakthroughs') or category name (e.g., 'Research & Breakthroughs') | |
| Returns: | |
| List of bookmarks in the specified category. | |
| """ | |
| cache = load_cache() | |
| bookmarks = cache.get("bookmarks", []) | |
| if not bookmarks: | |
| return [] | |
| # Check if category is a key or name | |
| category_key = None | |
| if category in AI_CATEGORIES: | |
| category_key = category | |
| else: | |
| # Search by category name | |
| for key, data in AI_CATEGORIES.items(): | |
| if data["name"].lower() == category.lower(): | |
| category_key = key | |
| break | |
| if not category_key and category.lower() != "uncategorized": | |
| return [] | |
| # Filter bookmarks by category | |
| filtered_bookmarks = [] | |
| for bookmark in bookmarks: | |
| bookmark_category = bookmark.get("category", "uncategorized") | |
| if (category_key and bookmark_category == category_key) or ( | |
| category.lower() == "uncategorized" and bookmark_category == "uncategorized" | |
| ): | |
| filtered_bookmarks.append(bookmark) | |
| return filtered_bookmarks | |
| def get_category_statistics() -> Dict[str, Any]: | |
| """ | |
| Gets statistics about bookmark categorization. | |
| Returns: | |
| Dictionary with categorization statistics and category information. | |
| """ | |
| cache = load_cache() | |
| bookmarks = cache.get("bookmarks", []) | |
| if not bookmarks: | |
| return {"error": "No bookmarks found in cache"} | |
| # Calculate current category distribution | |
| category_counts = {} | |
| for category_key in AI_CATEGORIES.keys(): | |
| category_counts[category_key] = 0 | |
| category_counts["uncategorized"] = 0 | |
| categorized_bookmarks = 0 | |
| for bookmark in bookmarks: | |
| category = bookmark.get("category", "uncategorized") | |
| category_counts[category] += 1 | |
| if category != "uncategorized": | |
| categorized_bookmarks += 1 | |
| # Prepare detailed category info | |
| category_details = {} | |
| for key, data in AI_CATEGORIES.items(): | |
| category_details[key] = { | |
| "name": data["name"], | |
| "description": data["description"], | |
| "count": category_counts[key], | |
| "percentage": round((category_counts[key] / len(bookmarks)) * 100, 2) if bookmarks else 0, | |
| } | |
| return { | |
| "total_bookmarks": len(bookmarks), | |
| "categorized_bookmarks": categorized_bookmarks, | |
| "uncategorized_bookmarks": category_counts["uncategorized"], | |
| "categorization_rate": round((categorized_bookmarks / len(bookmarks)) * 100, 2) if bookmarks else 0, | |
| "last_categorized": cache.get("last_categorized"), | |
| "category_details": category_details, | |
| "available_categories": list(AI_CATEGORIES.keys()), | |
| } | |
| def recategorize_bookmark(bookmark_id: str, new_category: str) -> Dict[str, Any]: | |
| """ | |
| Manually recategorizes a specific bookmark. | |
| Args: | |
| bookmark_id: ID of the bookmark to recategorize | |
| new_category: New category key (e.g., 'research_breakthroughs') or 'uncategorized' | |
| Returns: | |
| Dictionary with recategorization result. | |
| """ | |
| try: | |
| cache = load_cache() | |
| bookmarks = cache.get("bookmarks", []) | |
| # Find the bookmark | |
| bookmark_found = False | |
| for bookmark in bookmarks: | |
| if bookmark.get("id") == bookmark_id: | |
| # Validate new category | |
| if new_category == "uncategorized" or new_category in AI_CATEGORIES: | |
| old_category = bookmark.get("category", "uncategorized") | |
| bookmark["category"] = new_category | |
| bookmark["category_name"] = AI_CATEGORIES.get(new_category, {}).get("name", "Uncategorized") | |
| bookmark["manually_categorized"] = True | |
| bookmark["recategorized_at"] = datetime.now().isoformat() | |
| bookmark_found = True | |
| # Save updated cache | |
| if save_cache(cache): | |
| return { | |
| "status": "success", | |
| "message": f"Bookmark '{bookmark.get('title', 'Unknown')}' recategorized from '{old_category}' to '{new_category}'", | |
| "bookmark_title": bookmark.get("title"), | |
| "old_category": old_category, | |
| "new_category": new_category, | |
| } | |
| else: | |
| return {"status": "error", "message": "Failed to save recategorized bookmark"} | |
| else: | |
| return {"status": "error", "message": f"Invalid category: {new_category}"} | |
| if not bookmark_found: | |
| return {"status": "error", "message": f"Bookmark with ID '{bookmark_id}' not found"} | |
| except Exception as e: | |
| return {"status": "error", "message": f"Error recategorizing bookmark: {str(e)}"} | |
| def get_uncategorized_bookmarks() -> List[Dict[str, Any]]: | |
| """ | |
| Gets all bookmarks that are currently uncategorized. | |
| Returns: | |
| List of uncategorized bookmarks. | |
| """ | |
| cache = load_cache() | |
| bookmarks = cache.get("bookmarks", []) | |
| uncategorized = [] | |
| for bookmark in bookmarks: | |
| if bookmark.get("category", "uncategorized") == "uncategorized": | |
| uncategorized.append(bookmark) | |
| return uncategorized | |
| def search_bookmarks_by_category_and_query(category: str, query: str) -> List[Dict[str, Any]]: | |
| """ | |
| Search bookmarks within a specific category using a query. | |
| Args: | |
| category: Category key or name to search within | |
| query: Search term to find in bookmark titles or URLs | |
| Returns: | |
| List of matching bookmarks within the specified category. | |
| """ | |
| # First get bookmarks by category | |
| category_bookmarks = get_bookmarks_by_category(category) | |
| if not category_bookmarks: | |
| return [] | |
| # Then search within those bookmarks | |
| query_lower = query.lower() | |
| matching_bookmarks = [] | |
| for bookmark in category_bookmarks: | |
| title = bookmark.get("title", "").lower() | |
| url = bookmark.get("url", "").lower() | |
| if query_lower in title or query_lower in url: | |
| matching_bookmarks.append(bookmark) | |
| return matching_bookmarks | |
| # Instantiate the Categoriser CodeAgent | |
| categoriser_agent = CodeAgent( | |
| model=InferenceClientModel( | |
| provider="nebius", | |
| token=os.environ["HF_TOKEN"], | |
| ), | |
| tools=[ | |
| categorize_all_bookmarks, | |
| get_bookmarks_by_category, | |
| get_category_statistics, | |
| recategorize_bookmark, | |
| get_uncategorized_bookmarks, | |
| search_bookmarks_by_category_and_query, | |
| ], | |
| name="categoriser_agent", | |
| description="Specializes in categorizing AI news and bookmarks into 10 predefined categories: Research & Breakthroughs, Model Releases & Updates, Tools/Frameworks/Platforms, Applications & Industry Use Cases, Regulation/Ethics/Policy, Investment/Funding/M&A, Benchmarks & Leaderboards, Community/Events/Education, Security/Privacy/Safety, and Market Trends & Analysis. Uses keyword-based categorization and provides tools for managing and searching categorized content.", | |
| max_steps=10, | |
| additional_authorized_imports=["json", "datetime", "re", "pathlib"], | |
| # Reduce verbosity | |
| stream_outputs=False, | |
| max_print_outputs_length=300, | |
| ) | |