cwpkd commited on
Commit
c3e03eb
Β·
verified Β·
1 Parent(s): d128b27

Create utils/llm_analyzer.py

Browse files
Files changed (1) hide show
  1. utils/llm_analyzer.py +145 -0
utils/llm_analyzer.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # utils/llm_analyzer.py
2
+ """
3
+ LLM-based analysis using Gemma model
4
+ """
5
+
6
+ import torch
7
+ from transformers import AutoTokenizer, AutoModelForCausalLM
8
+ from typing import List, Dict
9
+ from config import GEMMA_MODEL, LLM_MAX_LENGTH, LLM_TEMPERATURE, LLM_TOP_P
10
+
11
+
12
+ class LLMAnalyzer:
13
+ """Analyze and summarize using Gemma LLM"""
14
+
15
+ def __init__(self):
16
+ """Initialize Gemma model"""
17
+ print("Loading Gemma model...")
18
+ self.tokenizer = AutoTokenizer.from_pretrained(GEMMA_MODEL)
19
+ self.model = AutoModelForCausalLM.from_pretrained(
20
+ GEMMA_MODEL,
21
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
22
+ device_map="auto"
23
+ )
24
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
25
+ print(f"Gemma loaded on {self.device}!")
26
+
27
+ def generate_response(self, prompt: str, max_length: int = LLM_MAX_LENGTH) -> str:
28
+ """
29
+ Generate response from Gemma
30
+
31
+ Args:
32
+ prompt: Input prompt
33
+ max_length: Maximum length of generated text
34
+
35
+ Returns:
36
+ Generated text
37
+ """
38
+ # Format prompt for Gemma
39
+ formatted_prompt = f"<start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model\n"
40
+
41
+ inputs = self.tokenizer(formatted_prompt, return_tensors="pt").to(self.device)
42
+
43
+ with torch.no_grad():
44
+ outputs = self.model.generate(
45
+ **inputs,
46
+ max_new_tokens=max_length,
47
+ temperature=LLM_TEMPERATURE,
48
+ top_p=LLM_TOP_P,
49
+ do_sample=True,
50
+ pad_token_id=self.tokenizer.eos_token_id
51
+ )
52
+
53
+ response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
54
+
55
+ # Extract only the model's response
56
+ if "<start_of_turn>model" in response:
57
+ response = response.split("<start_of_turn>model")[-1].strip()
58
+
59
+ return response
60
+
61
+ def summarize_news(self, articles: List[Dict]) -> str:
62
+ """
63
+ Summarize news articles
64
+
65
+ Args:
66
+ articles: List of article dictionaries
67
+
68
+ Returns:
69
+ Summary text
70
+ """
71
+ # Prepare articles text
72
+ articles_text = ""
73
+ for i, article in enumerate(articles[:5], 1): # Limit to 5 articles
74
+ articles_text += f"{i}. {article['title']}\n"
75
+ if 'summary' in article:
76
+ articles_text += f" {article['summary'][:200]}...\n\n"
77
+
78
+ prompt = f"""Analyze these financial news headlines and provide a brief market summary (2-3 sentences):
79
+
80
+ {articles_text}
81
+
82
+ Summary:"""
83
+
84
+ return self.generate_response(prompt, max_length=200)
85
+
86
+ def analyze_sentiment_context(self, article: Dict, sentiment_data: Dict) -> str:
87
+ """
88
+ Provide context for sentiment analysis
89
+
90
+ Args:
91
+ article: Article dictionary
92
+ sentiment_data: Sentiment analysis results
93
+
94
+ Returns:
95
+ Analysis text
96
+ """
97
+ sentiment_label = sentiment_data['sentiment_label']
98
+ confidence = sentiment_data['confidence']
99
+
100
+ prompt = f"""As a financial analyst, explain why this news headline has a {sentiment_label.lower()} sentiment (confidence: {confidence:.2%}):
101
+
102
+ Headline: {article['title']}
103
+ Summary: {article.get('summary', 'N/A')[:200]}
104
+
105
+ Provide a brief explanation (2-3 sentences):"""
106
+
107
+ return self.generate_response(prompt, max_length=150)
108
+
109
+ def generate_investment_insight(self, symbol: str, articles: List[Dict], sentiments: List[Dict]) -> str:
110
+ """
111
+ Generate investment insights based on news and sentiment
112
+
113
+ Args:
114
+ symbol: Stock ticker symbol
115
+ articles: List of articles
116
+ sentiments: List of sentiment analyses
117
+
118
+ Returns:
119
+ Investment insight text
120
+ """
121
+ # Calculate average sentiment
122
+ avg_sentiment = sum(s['combined_score'] for s in sentiments) / len(sentiments)
123
+
124
+ # Count sentiment distribution
125
+ positive = sum(1 for s in sentiments if s['sentiment_label'] == 'Positive')
126
+ negative = sum(1 for s in sentiments if s['sentiment_label'] == 'Negative')
127
+ neutral = len(sentiments) - positive - negative
128
+
129
+ # Prepare recent headlines
130
+ headlines = "\n".join([f"- {a['title']}" for a in articles[:3]])
131
+
132
+ prompt = f"""As a financial advisor, provide investment insights for {symbol} based on recent news sentiment:
133
+
134
+ Recent Headlines:
135
+ {headlines}
136
+
137
+ Sentiment Analysis:
138
+ - Positive: {positive}/{len(sentiments)}
139
+ - Negative: {negative}/{len(sentiments)}
140
+ - Neutral: {neutral}/{len(sentiments)}
141
+ - Average Score: {avg_sentiment:.2f}
142
+
143
+ Provide brief investment insights (3-4 sentences):"""
144
+
145
+ return self.generate_response(prompt, max_length=250)