cicboy commited on
Commit
856dc82
·
1 Parent(s): 6474a95

update sentiment_tool.py

Browse files
Files changed (1) hide show
  1. tools/sentiment_tool.py +251 -98
tools/sentiment_tool.py CHANGED
@@ -1,149 +1,302 @@
1
  import os
 
2
  import requests
 
 
3
  from crewai.tools import BaseTool
4
  from openai import OpenAI
5
- from typing import Type
6
  from pydantic import BaseModel, Field
7
 
 
 
 
8
  SERPER_API_KEY = os.getenv("SERPER_API_KEY")
9
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
10
 
11
  client = OpenAI(api_key=OPENAI_API_KEY)
12
 
13
- # ------------------------
14
- # INPUT SCHEMA
15
- # ------------------------
 
16
  class SentimentInput(BaseModel):
17
- query: str = Field(default="bitcoin", description="Cryptocurrency name to evaluate sentiment for.")
 
 
 
18
 
19
- # ------------------------
20
- # SENTIMENT TOOL
21
- # ------------------------
 
22
  class SentimentTool(BaseTool):
23
  name: str = "get_crypto_sentiment"
24
  description: str = (
25
- "Fetches recent cryptocurrency news and Reddit discussions using Serper.dev, "
26
- "then performs sentiment analysis using OpenAI GPT. Returns structured JSON."
 
27
  )
28
- arg_schema: Type[BaseModel] = SentimentInput
 
29
 
30
- def _run(self, query: str = "bitcoin") -> str:
31
- # ============================
32
- # 1) FETCH NEWS VIA SERPER
33
- # ============================
34
- news_headlines = []
35
- news_error = None
 
 
 
 
 
 
36
 
37
- try:
38
- news_payload = {
39
- "q": f"{query} crypto news",
40
- "num": 10
41
- }
42
- headers = {"X-API-KEY": SERPER_API_KEY, "Content-Type": "application/json"}
43
 
44
- news_res = requests.post(
45
- "https://google.serper.dev/news",
46
- headers=headers,
47
- json=news_payload,
48
- timeout=10
49
- )
50
- news_res.raise_for_status()
51
 
52
- news_json = news_res.json().get("news", [])
53
- news_headlines = [n.get("title") for n in news_json if n.get("title")]
54
- news_headlines = news_headlines[:10]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
- except Exception as e:
57
- news_error = str(e)
 
 
 
 
 
 
 
 
 
58
 
59
- # ============================
60
- # 2) FORCED REDDIT SCRAPING (RELIABLE)
61
- # ============================
62
- reddit_titles = []
63
- reddit_error = None
 
 
 
64
 
65
  try:
66
- # Serper search that *forces* Reddit results
67
- reddit_payload = {
68
- "q": (
69
- f"site:reddit.com/r/cryptocurrency OR "
70
- f"site:reddit.com/r/{query} "
71
- f"{query} discussion latest"
72
- ),
73
  "num": 10
74
  }
75
 
76
- reddit_res = requests.post(
77
- "https://google.serper.dev/search",
78
- headers=headers,
79
- json=reddit_payload,
80
- timeout=10
81
- )
82
- reddit_res.raise_for_status()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
- reddit_json = reddit_res.json()
85
- organic_results = reddit_json.get("organic", [])
 
 
 
 
 
 
 
 
86
 
87
- reddit_titles = [
88
- item.get("title")
 
 
 
 
89
  for item in organic_results
90
- if "reddit.com" in item.get("link", "")
91
  ]
 
 
 
92
 
93
- reddit_titles = reddit_titles[:5]
 
 
 
 
 
 
 
 
 
94
 
95
- except Exception as e:
96
- reddit_error = str(e)
97
 
98
- # ============================
99
- # 3) SENTIMENT ANALYSIS
100
- # ============================
101
- combined_text = (
102
- "News Headlines:\n" + "\n".join(news_headlines) +
103
- "\n\nReddit Posts:\n" + "\n".join(reddit_titles)
104
- )
 
105
 
106
- sentiment_prompt = f"""
107
- You are a cryptocurrency sentiment analyst.
108
 
109
- Based on the following combined news headlines and Reddit discussions, classify the overall sentiment toward "{query}" as **bullish**, **bearish**, or **neutral**.
 
110
 
111
- Return only valid JSON in this format:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
  {{
114
- "sentiment": "bullish/bearish/neutral",
115
- "reasoning": "short explanation",
116
- "news_headlines": [...],
117
- "reddit_titles": [...],
118
- "news_error": null or string,
119
- "reddit_error": null or string
120
  }}
121
 
122
- CONTENT TO ANALYSE:
123
- -------------------
 
 
124
  {combined_text}
125
  """
126
 
127
- try:
128
  completion = client.chat.completions.create(
129
  model="gpt-4.1",
 
130
  messages=[
131
- {"role": "system", "content": "You are a precise sentiment classifier. Respond only with JSON."},
132
- {"role": "user", "content": sentiment_prompt}
133
- ],
134
- temperature=0.2
135
  )
136
- sentiment_json = completion.choices[0].message.content
137
- return sentiment_json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
  except Exception as e:
140
- # Return structured failure JSON for debugging
141
- return {
142
- "sentiment": "unknown",
143
- "reasoning": "LLM sentiment analysis failed.",
144
- "news_headlines": news_headlines,
145
- "reddit_titles": reddit_titles,
146
- "news_error": news_error,
147
- "reddit_error": reddit_error,
148
- "llm_error": str(e)
149
- }
 
1
  import os
2
+ import json
3
  import requests
4
+ from typing import Type, List
5
+
6
  from crewai.tools import BaseTool
7
  from openai import OpenAI
 
8
  from pydantic import BaseModel, Field
9
 
10
+ # -----------------------------
11
+ # Environment variables
12
+ # -----------------------------
13
  SERPER_API_KEY = os.getenv("SERPER_API_KEY")
14
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
15
 
16
  client = OpenAI(api_key=OPENAI_API_KEY)
17
 
18
+
19
+ # -----------------------------
20
+ # Input schema
21
+ # -----------------------------
22
  class SentimentInput(BaseModel):
23
+ query: str = Field(
24
+ default="bitcoin",
25
+ description="Cryptocurrency name the user is asking about, e.g. 'bitcoin', 'ethereum', 'solana'."
26
+ )
27
 
28
+
29
+ # -----------------------------
30
+ # Sentiment Tool
31
+ # -----------------------------
32
  class SentimentTool(BaseTool):
33
  name: str = "get_crypto_sentiment"
34
  description: str = (
35
+ "Fetches recent cryptocurrency news and Reddit discussions for a given coin, "
36
+ "then returns structured sentiment JSON based on Serper (Google News + "
37
+ "r/CryptoMarkets comments) and OpenAI analysis."
38
  )
39
+ # IMPORTANT: args_schema (not arg_schema) for Pydantic v2 + CrewAI
40
+ args_schema: Type[BaseModel] = SentimentInput
41
 
42
+ # -----------------------------------------
43
+ # Helper: dynamic coin keywords via CoinGecko
44
+ # -----------------------------------------
45
+ def _coin_keywords(self, coin: str) -> List[str]:
46
+ """
47
+ Build a keyword set for matching Reddit comments:
48
+ - coin name
49
+ - no-space version
50
+ - CoinGecko ticker symbol (e.g. btc, eth, sol) when available
51
+ """
52
+ coin = coin.lower().strip()
53
+ keywords = set()
54
 
55
+ if not coin:
56
+ return ["bitcoin", "btc"]
 
 
 
 
57
 
58
+ # Base name variants
59
+ keywords.add(coin) # "bitcoin"
60
+ keywords.add(coin.replace(" ", "")) # "shiba inu" -> "shibainu"
61
+ keywords.add(coin.split()[0]) # first word e.g. "shiba"
62
+ if len(coin) >= 3:
63
+ keywords.add(coin[:3]) # crude fallback, e.g. "bit"
 
64
 
65
+ # Try to get symbol from CoinGecko
66
+ try:
67
+ # First attempt: assume user input matches CoinGecko ID
68
+ cg_url = f"https://api.coingecko.com/api/v3/coins/{coin}"
69
+ r = requests.get(cg_url, timeout=5)
70
+ if r.status_code != 200:
71
+ # Fallback: use /search when ID doesn't match
72
+ search_url = "https://api.coingecko.com/api/v3/search"
73
+ sr = requests.get(search_url, params={"query": coin}, timeout=5)
74
+ if sr.status_code == 200:
75
+ results = sr.json().get("coins", [])
76
+ if results:
77
+ first_id = results[0].get("id")
78
+ if first_id:
79
+ r = requests.get(
80
+ f"https://api.coingecko.com/api/v3/coins/{first_id}",
81
+ timeout=5
82
+ )
83
 
84
+ if r.status_code == 200:
85
+ data = r.json()
86
+ symbol = data.get("symbol", "").lower()
87
+ if symbol:
88
+ keywords.add(symbol) # "btc"
89
+ keywords.add(symbol.upper()) # "BTC"
90
+ keywords.add(symbol + " price")
91
+ keywords.add(coin + " price")
92
+ except Exception:
93
+ # If CoinGecko fails, we still have the base keywords
94
+ pass
95
 
96
+ return list({k for k in keywords if k})
97
+
98
+ # -----------------------------------------
99
+ # Helper: fetch recent news headlines
100
+ # -----------------------------------------
101
+ def _fetch_news(self, query: str) -> List[str]:
102
+ if not SERPER_API_KEY:
103
+ return []
104
 
105
  try:
106
+ url = "https://google.serper.dev/news"
107
+ headers = {
108
+ "X-API-KEY": SERPER_API_KEY,
109
+ "Content-Type": "application/json"
110
+ }
111
+ payload = {
112
+ "q": f"{query} crypto",
113
  "num": 10
114
  }
115
 
116
+ r = requests.post(url, headers=headers, json=payload, timeout=10)
117
+ r.raise_for_status()
118
+ news_items = r.json().get("news", [])
119
+ return [n.get("title", "").strip() for n in news_items[:10] if n.get("title")]
120
+ except Exception:
121
+ return []
122
+
123
+ # -----------------------------------------
124
+ # Helper: find recent r/CryptoMarkets posts (last 7 days)
125
+ # -----------------------------------------
126
+ def _fetch_reddit_post_urls(self, keywords: List[str]) -> List[str]:
127
+ """
128
+ Use Serper search to find r/CryptoMarkets/comments posts in the last 7 days
129
+ matching the coin keywords.
130
+ """
131
+ if not SERPER_API_KEY:
132
+ return []
133
+
134
+ try:
135
+ query_string = " OR ".join(f'"{k}"' for k in keywords)
136
+ search_query = f"({query_string}) site:reddit.com/r/CryptoMarkets/comments"
137
 
138
+ url = "https://google.serper.dev/search"
139
+ headers = {
140
+ "X-API-KEY": SERPER_API_KEY,
141
+ "Content-Type": "application/json"
142
+ }
143
+ payload = {
144
+ "q": search_query,
145
+ "num": 10,
146
+ "tbs": "qdr:w" # last 7 days
147
+ }
148
 
149
+ r = requests.post(url, headers=headers, json=payload, timeout=10)
150
+ r.raise_for_status()
151
+
152
+ organic_results = r.json().get("organic", [])
153
+ urls = [
154
+ item.get("link")
155
  for item in organic_results
156
+ if "/comments/" in (item.get("link") or "")
157
  ]
158
+ return [u for u in urls if u]
159
+ except Exception:
160
+ return []
161
 
162
+ # -----------------------------------------
163
+ # Helper: scrape Reddit comments from Serper
164
+ # -----------------------------------------
165
+ def _scrape_reddit_comments(self, urls: List[str], keywords: List[str]) -> List[str]:
166
+ """
167
+ Use Serper /scrape to pull text blocks from Reddit threads.
168
+ Keep only early blocks (top comments) that mention the coin keywords.
169
+ """
170
+ if not SERPER_API_KEY:
171
+ return []
172
 
173
+ comments: List[str] = []
 
174
 
175
+ for link in urls[:3]: # limit to 3 threads for speed & cost
176
+ try:
177
+ url = "https://google.serper.dev/scrape"
178
+ headers = {
179
+ "X-API-KEY": SERPER_API_KEY,
180
+ "Content-Type": "application/json"
181
+ }
182
+ payload = {"url": link}
183
 
184
+ r = requests.post(url, headers=headers, json=payload, timeout=10)
185
+ r.raise_for_status()
186
 
187
+ blocks = r.json().get("blocks", [])
188
+ text_blocks = [b.get("text", "") for b in blocks[:20]]
189
 
190
+ for t in text_blocks:
191
+ text = (t or "").strip()
192
+ if not text:
193
+ continue
194
+ lower = text.lower()
195
+ # basic relevance: contains any coin keyword and is not tiny
196
+ if any(k.lower() in lower for k in keywords) and len(text) > 40:
197
+ comments.append(text)
198
+
199
+ except Exception:
200
+ # Skip any failed scrape silently
201
+ continue
202
+
203
+ # Cap to 10 highest-signal comments
204
+ return comments[:10]
205
+
206
+ # -----------------------------------------
207
+ # Main execution
208
+ # -----------------------------------------
209
+ def _run(self, query: str = "bitcoin") -> dict:
210
+ """
211
+ End-to-end sentiment pipeline:
212
+ - Build coin keyword set (coin name + ticker via CoinGecko)
213
+ - Fetch Serper News for the coin
214
+ - Fetch r/CryptoMarkets posts in last 7 days and scrape comments
215
+ - Ask OpenAI (gpt-4.1) to return structured JSON sentiment.
216
+ """
217
+ if not OPENAI_API_KEY:
218
+ return {"error": "OPENAI_API_KEY missing in environment."}
219
+ if not SERPER_API_KEY:
220
+ return {
221
+ "error": "SERPER_API_KEY missing in environment. "
222
+ "Cannot fetch news/reddit sentiment."
223
+ }
224
+
225
+ try:
226
+ coin = query.strip()
227
+ if not coin:
228
+ coin = "bitcoin"
229
+
230
+ # 1) Build keyword set (coin + ticker)
231
+ keywords = self._coin_keywords(coin)
232
+
233
+ # 2) Fetch news
234
+ news_headlines = self._fetch_news(coin)
235
+
236
+ # 3) Fetch & scrape Reddit comments
237
+ reddit_urls = self._fetch_reddit_post_urls(keywords)
238
+ reddit_comments = self._scrape_reddit_comments(reddit_urls, keywords)
239
+
240
+ # 4) Build combined context
241
+ combined_text = (
242
+ "NEWS HEADLINES:\n"
243
+ + ("\n".join(f"- {h}" for h in news_headlines) if news_headlines else "None")
244
+ + "\n\nREDDIT COMMENTS (r/CryptoMarkets):\n"
245
+ + ("\n".join(f"- {c}" for c in reddit_comments) if reddit_comments else "None")
246
+ )
247
+
248
+ # 5) Ask OpenAI for structured sentiment JSON
249
+ prompt = f"""
250
+ You are a crypto sentiment analyst.
251
+
252
+ You are given recent NEWS HEADLINES and REDDIT COMMENTS about the coin "{coin}".
253
+
254
+ Your job:
255
+ 1. Decide whether the overall sentiment is bullish, bearish, or neutral.
256
+ 2. Write a short reasoning explaining why, referencing both news and reddit if available.
257
+ 3. Return ONLY valid JSON in this exact format:
258
 
259
  {{
260
+ "sentiment": "bullish" | "bearish" | "neutral",
261
+ "reasoning": "short explanation tying together news + reddit, if both exist",
262
+ "news_headlines": [...], // list of strings, may be empty
263
+ "reddit_comments": [...] // list of strings, may be empty
 
 
264
  }}
265
 
266
+ Do NOT wrap the JSON in backticks or any extra text.
267
+ Just return the JSON object.
268
+
269
+ DATA:
270
  {combined_text}
271
  """
272
 
 
273
  completion = client.chat.completions.create(
274
  model="gpt-4.1",
275
+ temperature=0.2,
276
  messages=[
277
+ {"role": "system", "content": "You are a precise crypto sentiment classifier."},
278
+ {"role": "user", "content": prompt}
279
+ ]
 
280
  )
281
+
282
+ raw_content = completion.choices[0].message.content.strip()
283
+
284
+ # Try to parse JSON; if it fails, wrap raw content
285
+ try:
286
+ parsed = json.loads(raw_content)
287
+ # Ensure we always attach raw data as well for downstream tools if needed
288
+ parsed.setdefault("news_headlines", news_headlines)
289
+ parsed.setdefault("reddit_comments", reddit_comments)
290
+ return parsed
291
+ except Exception:
292
+ # Fallback: return structured-ish dict with raw model output
293
+ return {
294
+ "sentiment": None,
295
+ "reasoning": "Model did not return valid JSON; raw content preserved.",
296
+ "news_headlines": news_headlines,
297
+ "reddit_comments": reddit_comments,
298
+ "raw_model_output": raw_content,
299
+ }
300
 
301
  except Exception as e:
302
+ return {"error": f"SentimentTool failed: {str(e)}"}