Spaces:

cicboy
/

Crypto_Analyst_Agent

Sleeping

App Files Files Community

cicboy commited on 21 days ago

Commit

856dc82

1 Parent(s): 6474a95

update sentiment_tool.py

Browse files

Files changed (1) hide show

tools/sentiment_tool.py +251 -98

tools/sentiment_tool.py CHANGED Viewed

@@ -1,149 +1,302 @@
 import os
 import requests
 from crewai.tools import BaseTool
 from openai import OpenAI
-from typing import Type
 from pydantic import BaseModel, Field
 SERPER_API_KEY = os.getenv("SERPER_API_KEY")
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 client = OpenAI(api_key=OPENAI_API_KEY)
-# ------------------------
-# INPUT SCHEMA
-# ------------------------
 class SentimentInput(BaseModel):
-    query: str = Field(default="bitcoin", description="Cryptocurrency name to evaluate sentiment for.")
-# ------------------------
-# SENTIMENT TOOL
-# ------------------------
 class SentimentTool(BaseTool):
     name: str = "get_crypto_sentiment"
     description: str = (
-        "Fetches recent cryptocurrency news and Reddit discussions using Serper.dev, "
-        "then performs sentiment analysis using OpenAI GPT. Returns structured JSON."
     )
-    arg_schema: Type[BaseModel] = SentimentInput
-    def _run(self, query: str = "bitcoin") -> str:
-        # ============================
-        # 1) FETCH NEWS VIA SERPER
-        # ============================
-        news_headlines = []
-        news_error = None
-        try:
-            news_payload = {
-                "q": f"{query} crypto news",
-                "num": 10
-            }
-            headers = {"X-API-KEY": SERPER_API_KEY, "Content-Type": "application/json"}
-            news_res = requests.post(
-                "https://google.serper.dev/news",
-                headers=headers,
-                json=news_payload,
-                timeout=10
-            )
-            news_res.raise_for_status()
-            news_json = news_res.json().get("news", [])
-            news_headlines = [n.get("title") for n in news_json if n.get("title")]
-            news_headlines = news_headlines[:10]
-        except Exception as e:
-            news_error = str(e)
-        # ============================
-        # 2) FORCED REDDIT SCRAPING (RELIABLE)
-        # ============================
-        reddit_titles = []
-        reddit_error = None
         try:
-            # Serper search that *forces* Reddit results
-            reddit_payload = {
-                "q": (
-                    f"site:reddit.com/r/cryptocurrency OR "
-                    f"site:reddit.com/r/{query} "
-                    f"{query} discussion latest"
-                ),
                 "num": 10
             }
-            reddit_res = requests.post(
-                "https://google.serper.dev/search",
-                headers=headers,
-                json=reddit_payload,
-                timeout=10
-            )
-            reddit_res.raise_for_status()
-            reddit_json = reddit_res.json()
-            organic_results = reddit_json.get("organic", [])
-            reddit_titles = [
-                item.get("title")
                 for item in organic_results
-                if "reddit.com" in item.get("link", "")
             ]
-            reddit_titles = reddit_titles[:5]
-        except Exception as e:
-            reddit_error = str(e)
-        # ============================
-        # 3) SENTIMENT ANALYSIS
-        # ============================
-        combined_text = (
-            "News Headlines:\n" + "\n".join(news_headlines) +
-            "\n\nReddit Posts:\n" + "\n".join(reddit_titles)
-        )
-        sentiment_prompt = f"""
-You are a cryptocurrency sentiment analyst.
-Based on the following combined news headlines and Reddit discussions, classify the overall sentiment toward "{query}" as **bullish**, **bearish**, or **neutral**.
-Return only valid JSON in this format:
 {{
-  "sentiment": "bullish/bearish/neutral",
-  "reasoning": "short explanation",
-  "news_headlines": [...],
-  "reddit_titles": [...],
-  "news_error": null or string,
-  "reddit_error": null or string
 }}
-CONTENT TO ANALYSE:
--------------------
 {combined_text}
 """
-        try:
             completion = client.chat.completions.create(
                 model="gpt-4.1",
                 messages=[
-                    {"role": "system", "content": "You are a precise sentiment classifier. Respond only with JSON."},
-                    {"role": "user", "content": sentiment_prompt}
-                ],
-                temperature=0.2
             )
-            sentiment_json = completion.choices[0].message.content
-            return sentiment_json
         except Exception as e:
-            # Return structured failure JSON for debugging
-            return {
-                "sentiment": "unknown",
-                "reasoning": "LLM sentiment analysis failed.",
-                "news_headlines": news_headlines,
-                "reddit_titles": reddit_titles,
-                "news_error": news_error,
-                "reddit_error": reddit_error,
-                "llm_error": str(e)
-            }

 import os
+import json
 import requests
+from typing import Type, List
 from crewai.tools import BaseTool
 from openai import OpenAI
 from pydantic import BaseModel, Field
+# -----------------------------
+# Environment variables
+# -----------------------------
 SERPER_API_KEY = os.getenv("SERPER_API_KEY")
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 client = OpenAI(api_key=OPENAI_API_KEY)
+# -----------------------------
+# Input schema
+# -----------------------------
 class SentimentInput(BaseModel):
+    query: str = Field(
+        default="bitcoin",
+        description="Cryptocurrency name the user is asking about, e.g. 'bitcoin', 'ethereum', 'solana'."
+    )
+# -----------------------------
+# Sentiment Tool
+# -----------------------------
 class SentimentTool(BaseTool):
     name: str = "get_crypto_sentiment"
     description: str = (
+        "Fetches recent cryptocurrency news and Reddit discussions for a given coin, "
+        "then returns structured sentiment JSON based on Serper (Google News + "
+        "r/CryptoMarkets comments) and OpenAI analysis."
     )
+    # IMPORTANT: args_schema (not arg_schema) for Pydantic v2 + CrewAI
+    args_schema: Type[BaseModel] = SentimentInput
+    # -----------------------------------------
+    # Helper: dynamic coin keywords via CoinGecko
+    # -----------------------------------------
+    def _coin_keywords(self, coin: str) -> List[str]:
+        """
+        Build a keyword set for matching Reddit comments:
+        - coin name
+        - no-space version
+        - CoinGecko ticker symbol (e.g. btc, eth, sol) when available
+        """
+        coin = coin.lower().strip()
+        keywords = set()
+        if not coin:
+            return ["bitcoin", "btc"]
+        # Base name variants
+        keywords.add(coin)                      # "bitcoin"
+        keywords.add(coin.replace(" ", ""))     # "shiba inu" -> "shibainu"
+        keywords.add(coin.split()[0])           # first word e.g. "shiba"
+        if len(coin) >= 3:
+            keywords.add(coin[:3])              # crude fallback, e.g. "bit"
+        # Try to get symbol from CoinGecko
+        try:
+            # First attempt: assume user input matches CoinGecko ID
+            cg_url = f"https://api.coingecko.com/api/v3/coins/{coin}"
+            r = requests.get(cg_url, timeout=5)
+            if r.status_code != 200:
+                # Fallback: use /search when ID doesn't match
+                search_url = "https://api.coingecko.com/api/v3/search"
+                sr = requests.get(search_url, params={"query": coin}, timeout=5)
+                if sr.status_code == 200:
+                    results = sr.json().get("coins", [])
+                    if results:
+                        first_id = results[0].get("id")
+                        if first_id:
+                            r = requests.get(
+                                f"https://api.coingecko.com/api/v3/coins/{first_id}",
+                                timeout=5
+                            )
+            if r.status_code == 200:
+                data = r.json()
+                symbol = data.get("symbol", "").lower()
+                if symbol:
+                    keywords.add(symbol)          # "btc"
+                    keywords.add(symbol.upper())  # "BTC"
+                    keywords.add(symbol + " price")
+                    keywords.add(coin + " price")
+        except Exception:
+            # If CoinGecko fails, we still have the base keywords
+            pass
+        return list({k for k in keywords if k})
+    # -----------------------------------------
+    # Helper: fetch recent news headlines
+    # -----------------------------------------
+    def _fetch_news(self, query: str) -> List[str]:
+        if not SERPER_API_KEY:
+            return []
         try:
+            url = "https://google.serper.dev/news"
+            headers = {
+                "X-API-KEY": SERPER_API_KEY,
+                "Content-Type": "application/json"
+            }
+            payload = {
+                "q": f"{query} crypto",
                 "num": 10
             }
+            r = requests.post(url, headers=headers, json=payload, timeout=10)
+            r.raise_for_status()
+            news_items = r.json().get("news", [])
+            return [n.get("title", "").strip() for n in news_items[:10] if n.get("title")]
+        except Exception:
+            return []
+    # -----------------------------------------
+    # Helper: find recent r/CryptoMarkets posts (last 7 days)
+    # -----------------------------------------
+    def _fetch_reddit_post_urls(self, keywords: List[str]) -> List[str]:
+        """
+        Use Serper search to find r/CryptoMarkets/comments posts in the last 7 days
+        matching the coin keywords.
+        """
+        if not SERPER_API_KEY:
+            return []
+        try:
+            query_string = " OR ".join(f'"{k}"' for k in keywords)
+            search_query = f"({query_string}) site:reddit.com/r/CryptoMarkets/comments"
+            url = "https://google.serper.dev/search"
+            headers = {
+                "X-API-KEY": SERPER_API_KEY,
+                "Content-Type": "application/json"
+            }
+            payload = {
+                "q": search_query,
+                "num": 10,
+                "tbs": "qdr:w"  # last 7 days
+            }
+            r = requests.post(url, headers=headers, json=payload, timeout=10)
+            r.raise_for_status()
+            organic_results = r.json().get("organic", [])
+            urls = [
+                item.get("link")
                 for item in organic_results
+                if "/comments/" in (item.get("link") or "")
             ]
+            return [u for u in urls if u]
+        except Exception:
+            return []
+    # -----------------------------------------
+    # Helper: scrape Reddit comments from Serper
+    # -----------------------------------------
+    def _scrape_reddit_comments(self, urls: List[str], keywords: List[str]) -> List[str]:
+        """
+        Use Serper /scrape to pull text blocks from Reddit threads.
+        Keep only early blocks (top comments) that mention the coin keywords.
+        """
+        if not SERPER_API_KEY:
+            return []
+        comments: List[str] = []
+        for link in urls[:3]:  # limit to 3 threads for speed & cost
+            try:
+                url = "https://google.serper.dev/scrape"
+                headers = {
+                    "X-API-KEY": SERPER_API_KEY,
+                    "Content-Type": "application/json"
+                }
+                payload = {"url": link}
+                r = requests.post(url, headers=headers, json=payload, timeout=10)
+                r.raise_for_status()
+                blocks = r.json().get("blocks", [])
+                text_blocks = [b.get("text", "") for b in blocks[:20]]
+                for t in text_blocks:
+                    text = (t or "").strip()
+                    if not text:
+                        continue
+                    lower = text.lower()
+                    # basic relevance: contains any coin keyword and is not tiny
+                    if any(k.lower() in lower for k in keywords) and len(text) > 40:
+                        comments.append(text)
+            except Exception:
+                # Skip any failed scrape silently
+                continue
+        # Cap to 10 highest-signal comments
+        return comments[:10]
+    # -----------------------------------------
+    # Main execution
+    # -----------------------------------------
+    def _run(self, query: str = "bitcoin") -> dict:
+        """
+        End-to-end sentiment pipeline:
+        - Build coin keyword set (coin name + ticker via CoinGecko)
+        - Fetch Serper News for the coin
+        - Fetch r/CryptoMarkets posts in last 7 days and scrape comments
+        - Ask OpenAI (gpt-4.1) to return structured JSON sentiment.
+        """
+        if not OPENAI_API_KEY:
+            return {"error": "OPENAI_API_KEY missing in environment."}
+        if not SERPER_API_KEY:
+            return {
+                "error": "SERPER_API_KEY missing in environment. "
+                         "Cannot fetch news/reddit sentiment."
+            }
+        try:
+            coin = query.strip()
+            if not coin:
+                coin = "bitcoin"
+            # 1) Build keyword set (coin + ticker)
+            keywords = self._coin_keywords(coin)
+            # 2) Fetch news
+            news_headlines = self._fetch_news(coin)
+            # 3) Fetch & scrape Reddit comments
+            reddit_urls = self._fetch_reddit_post_urls(keywords)
+            reddit_comments = self._scrape_reddit_comments(reddit_urls, keywords)
+            # 4) Build combined context
+            combined_text = (
+                "NEWS HEADLINES:\n"
+                + ("\n".join(f"- {h}" for h in news_headlines) if news_headlines else "None")
+                + "\n\nREDDIT COMMENTS (r/CryptoMarkets):\n"
+                + ("\n".join(f"- {c}" for c in reddit_comments) if reddit_comments else "None")
+            )
+            # 5) Ask OpenAI for structured sentiment JSON
+            prompt = f"""
+You are a crypto sentiment analyst.
+You are given recent NEWS HEADLINES and REDDIT COMMENTS about the coin "{coin}".
+Your job:
+1. Decide whether the overall sentiment is bullish, bearish, or neutral.
+2. Write a short reasoning explaining why, referencing both news and reddit if available.
+3. Return ONLY valid JSON in this exact format:
 {{
+  "sentiment": "bullish" | "bearish" | "neutral",
+  "reasoning": "short explanation tying together news + reddit, if both exist",
+  "news_headlines": [...],         // list of strings, may be empty
+  "reddit_comments": [...]         // list of strings, may be empty
 }}
+Do NOT wrap the JSON in backticks or any extra text.
+Just return the JSON object.
+DATA:
 {combined_text}
 """
             completion = client.chat.completions.create(
                 model="gpt-4.1",
+                temperature=0.2,
                 messages=[
+                    {"role": "system", "content": "You are a precise crypto sentiment classifier."},
+                    {"role": "user",   "content": prompt}
+                ]
             )
+            raw_content = completion.choices[0].message.content.strip()
+            # Try to parse JSON; if it fails, wrap raw content
+            try:
+                parsed = json.loads(raw_content)
+                # Ensure we always attach raw data as well for downstream tools if needed
+                parsed.setdefault("news_headlines", news_headlines)
+                parsed.setdefault("reddit_comments", reddit_comments)
+                return parsed
+            except Exception:
+                # Fallback: return structured-ish dict with raw model output
+                return {
+                    "sentiment": None,
+                    "reasoning": "Model did not return valid JSON; raw content preserved.",
+                    "news_headlines": news_headlines,
+                    "reddit_comments": reddit_comments,
+                    "raw_model_output": raw_content,
+                }
         except Exception as e:
+            return {"error": f"SentimentTool failed: {str(e)}"}