Inara132000 commited on
Commit
d532801
·
verified ·
1 Parent(s): 230c08d

Update helper.py

Browse files
Files changed (1) hide show
  1. helper.py +24 -234
helper.py CHANGED
@@ -17,310 +17,100 @@ from gtts import gTTS
17
  from huggingface_hub import hf_hub_download
18
  from keras.utils import pad_sequences
19
  from transformers import BertTokenizer
20
-
21
- from app.logger.app_logger import app_logger
22
-
23
-
24
  from selenium import webdriver
25
  from selenium.webdriver.chrome.options import Options
26
  import concurrent.futures
27
 
28
  class ChatBot:
29
- """
30
- A chatbot class that interacts with a local Llama model using Ollama.
31
- """
32
-
33
  def __init__(self) -> None:
34
- """Initialize the ChatBot instance with a conversation history."""
35
  self.history: List[Dict[str, str]] = [{"role": "system", "content": "You are a helpful assistant."}]
36
- app_logger.log_info("ChatBot instance initialized", level="INFO")
37
-
38
  def generate_response(self, prompt: str) -> str:
39
- """
40
- Generate a response from the chatbot based on the user's prompt.
41
-
42
- Args:
43
- prompt (str): The input message from the user.
44
-
45
- Returns:
46
- str: The chatbot's response to the provided prompt.
47
- """
48
  self.history.append({"role": "user", "content": prompt})
49
- app_logger.log_info("User prompt added to history", level="INFO")
50
-
51
- # Convert chat history into a string for subprocess input
52
  conversation: str = "\n".join(f"{msg['role']}: {msg['content']}" for msg in self.history)
53
-
54
  try:
55
- # Run the Llama model using Ollama
56
  completion: subprocess.CompletedProcess = subprocess.run(
57
  ["ollama", "run", "llama3.2:latest"],
58
  input=conversation,
59
  capture_output=True,
60
  text=True,
61
  )
62
-
63
  if completion.returncode != 0:
64
- app_logger.log_error(f"Error running subprocess: {completion.stderr}")
65
  return "I'm sorry, I encountered an issue processing your request."
66
-
67
  response: str = completion.stdout.strip()
68
  self.history.append({"role": "assistant", "content": response})
69
- app_logger.log_info("Assistant response generated", level="INFO")
70
-
71
  return response
72
-
73
- except Exception as e:
74
- app_logger.log_error(f"Error sending query to the model: {e}")
75
  return "I'm sorry, an error occurred while processing your request."
76
 
77
  async def rate_body_of_article(self, article_title: str, article_content: str) -> str:
78
- """
79
- Rate the quality of an article's content based on its title.
80
-
81
- Args:
82
- article_title (str): The title of the article.
83
- article_content (str): The full content of the article.
84
-
85
- Returns:
86
- str: A rating between 1 and 5 based on relevance and quality.
87
- """
88
  prompt: str = f"""
89
  Given the following article title and content, provide a rating between 1 and 5
90
  based on how well the content aligns with the title and its overall quality.
91
-
92
  - **Article Title**: {article_title}
93
- - **Article Content**: {article_content[:1000]} # Limit to first 1000 chars
94
-
95
  **Instructions:**
96
  - The rating should be a whole number between 1 and 5.
97
  - Base your score on accuracy, clarity, and relevance.
98
  - Only return a single numeric value (1-5) with no extra text.
99
-
100
- **Example Output:**
101
- `4` or `2` or `3.5` or `1.5`
102
  """
103
-
104
  try:
105
- # Run the Llama model using Ollama
106
  completion: subprocess.CompletedProcess = subprocess.run(
107
  ["ollama", "run", "llama3.2:latest"],
108
  input=prompt,
109
  capture_output=True,
110
  text=True,
111
  )
112
-
113
  if completion.returncode != 0:
114
- app_logger.log_error(f"Error running subprocess: {completion.stderr}")
115
  return "Error"
116
-
117
  response: str = completion.stdout.strip()
118
-
119
- # Validate the rating is within the expected range
120
- if response.isdigit() and 1 <= int(response) <= 5:
121
- self.history.append({"role": "assistant", "content": response})
122
- app_logger.log_info(f"Article rated: {response}", level="INFO")
123
- return response
124
- else:
125
- app_logger.log_warning(f"Invalid rating received: {response}")
126
- return "Error"
127
-
128
- except Exception as e:
129
- app_logger.log_error(f"Error sending query to the model: {e}")
130
  return "Error"
131
 
132
  async def rate_article_credibility(self, article_title: str, article_content: str) -> str:
133
- """
134
- Rate the credibility of an article using a locally created model.
135
-
136
- Args:
137
- article_title (str): The title of the article.
138
- article_content (str): The full content of the article.
139
-
140
- Returns:
141
- str: A credibility rating based on the model's prediction.
142
- """
143
  try:
144
- # Load the model
145
  model_path: str = hf_hub_download(repo_id="Dkethan/my-tf-nn-model-v2", filename="model.keras")
146
  new_model = keras.models.load_model(model_path)
147
-
148
- # Load the Hugging Face tokenizer
149
  tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
150
-
151
- # Preprocess the input data
152
- max_length: int = new_model.input_shape[0][1] # Ensure max_length matches the model input
153
- X_text = tokenizer(
154
- [article_title], # Tokenize the article title
155
- max_length=max_length,
156
- padding="max_length",
157
- truncation=True,
158
- return_tensors="tf"
159
- )
160
-
161
- # Dummy 'func_rating' input (can be replaced with actual data)
162
- X_func_rating: np.ndarray = np.array([5]).reshape(-1, 1) # Replace with actual input if available
163
-
164
- # Make predictions
165
- predictions: np.ndarray = new_model.predict(
166
- {"text_input": X_text["input_ids"], "func_rating_input": X_func_rating}
167
- )
168
- prediction: int = np.argmax(predictions, axis=1)[0]
169
-
170
- # Log and return the prediction
171
- app_logger.log_info(f"Article credibility rated: {prediction}", level="INFO")
172
- return str(prediction)
173
-
174
- except Exception as e:
175
- app_logger.log_error(f"Error rating article credibility: {e}")
176
  return "Error"
177
 
178
 
179
  def extract_news_body(news_url: str) -> str:
180
- """
181
- Extract the full article body from a given news URL.
182
-
183
- Args:
184
- news_url (str): The URL of the news article.
185
-
186
- Returns:
187
- str: Extracted full article content.
188
- """
189
- headers: Dict[str, str] = {
190
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
191
- }
192
  retries: int = 3
193
  for attempt in range(retries):
194
  try:
195
  response: requests.Response = requests.get(news_url, headers=headers, timeout=10)
196
- if response.status_code == 403:
197
- app_logger.log_error(f"Access forbidden to article: {response.status_code}")
198
- return "Access forbidden to article."
199
  if response.status_code != 200:
200
- app_logger.log_error(f"Failed to fetch article: {response.status_code}")
201
  return "Failed to fetch article."
202
-
203
  soup: BeautifulSoup = BeautifulSoup(response.text, "html.parser")
204
  paragraphs: List[BeautifulSoup] = soup.find_all("p")
205
-
206
- # Extract and return cleaned text
207
- article_content: str = "\n".join([p.text.strip() for p in paragraphs if p.text.strip()])
208
- app_logger.log_info(f"Article content extracted from {news_url}", level="INFO")
209
- return article_content
210
-
211
  except requests.exceptions.Timeout:
212
- app_logger.log_warning(f"Timeout occurred while fetching article: {news_url}, attempt {attempt + 1}")
213
- if attempt < retries - 1:
214
- time.sleep(2) # Wait before retrying
215
- continue
216
- return "Error: Timeout occurred while fetching article."
217
-
218
- except Exception as e:
219
- app_logger.log_error(f"Error extracting article content: {e}")
220
- return f"Error extracting article content: {e}"
221
-
222
  return "Failed to fetch article after multiple attempts."
223
 
224
- async def invoke_duckduckgo_news_search(query: str, num: int = 3, location: str = "us-en", time_filter: str = "w") -> Dict[str, Any]:
225
- """
226
- Perform a news search on DuckDuckGo and return the results.
227
-
228
- Args:
229
- query (str): The search query.
230
- num (int): The number of results to return.
231
- location (str): The location filter for the search.
232
- time_filter (str): The time filter for the search.
233
-
234
- Returns:
235
- Dict[str, Any]: A dictionary containing the search results.
236
- """
237
- app_logger.log_info(f"Starting DuckDuckGo news search for query: {query}", level="INFO")
238
-
239
- chrome_options: Options = Options()
240
- chrome_options.add_argument("--headless")
241
- driver: webdriver.Chrome = webdriver.Chrome(options=chrome_options)
242
-
243
- duckduckgo_news_url: str = f"https://duckduckgo.com/html/?q={query.replace(' ', '+')}&kl={location}&df={time_filter}&ia=news"
244
- driver.get(duckduckgo_news_url)
245
-
246
- soup: BeautifulSoup = BeautifulSoup(driver.page_source, "html.parser")
247
- search_results: List[BeautifulSoup] = soup.find_all("div", class_="result__body")
248
-
249
- def process_article(result: BeautifulSoup, index: int) -> Optional[Dict[str, Any]]:
250
- """
251
- Process a single search result and extract relevant information.
252
-
253
- Args:
254
- result (BeautifulSoup): The search result to process.
255
- index (int): The index of the search result.
256
-
257
- Returns:
258
- Optional[Dict[str, Any]]: A dictionary containing the extracted information, or None if an error occurs.
259
- """
260
- try:
261
- title_tag: Optional[BeautifulSoup] = result.find("a", class_="result__a")
262
- if not title_tag:
263
- app_logger.log_warning(f"Title tag not found for result index {index}")
264
- return None
265
-
266
- title: str = title_tag.text.strip()
267
- raw_link: str = title_tag["href"]
268
-
269
- match: Optional[re.Match] = re.search(r"uddg=(https?%3A%2F%2F[^&]+)", raw_link)
270
- link: str = urllib.parse.unquote(match.group(1)) if match else "Unknown Link"
271
-
272
- snippet_tag: Optional[BeautifulSoup] = result.find("a", class_="result__snippet")
273
- summary: str = snippet_tag.text.strip() if snippet_tag else "No summary available."
274
-
275
- article_content: str = extract_news_body(link)
276
-
277
- bot: ChatBot = ChatBot()
278
-
279
- # Rate the rate_body_of_article
280
- # rating: str = asyncio.run(bot.rate_body_of_article(title, article_content))
281
-
282
- # Rate the credibility of the article
283
- rating: str = asyncio.run(bot.rate_article_credibility(title, article_content))
284
-
285
- app_logger.log_info(f"Processed article: {title}", level="INFO")
286
-
287
- return {
288
- "num": index + 1,
289
- "link": link,
290
- "title": title,
291
- "summary": summary,
292
- "body": article_content,
293
- "rating": rating
294
- }
295
-
296
- except Exception as e:
297
- app_logger.log_error(f"Error processing article: {e}")
298
- return None
299
-
300
- with concurrent.futures.ThreadPoolExecutor() as executor:
301
- tasks: List[concurrent.futures.Future] = [executor.submit(process_article, result, index) for index, result in enumerate(search_results[:num])]
302
- extracted_results: List[Optional[Dict[str, Any]]] = [task.result() for task in concurrent.futures.as_completed(tasks)]
303
-
304
- driver.quit()
305
-
306
- extracted_results = [res for res in extracted_results if res is not None]
307
-
308
- if extracted_results:
309
- app_logger.log_info(f"News search completed successfully with {len(extracted_results)} results", level="INFO")
310
- return {"status": "success", "results": extracted_results}
311
- else:
312
- app_logger.log_error("No valid news search results found")
313
- return {"status": "error", "message": "No valid news search results found"}
314
-
315
  def current_year() -> int:
316
- """Returns the current year as an integer."""
317
  return datetime.now().year
318
 
319
  def save_to_audio(text: str) -> None:
320
- """Converts text to an audio file using Google Text-to-Speech (gTTS)."""
321
  try:
322
  tts: gTTS = gTTS(text=text, lang="en")
323
  tts.save("output.mp3")
324
- app_logger.log_info("Response converted to audio", level="INFO")
325
- except Exception as e:
326
- app_logger.log_error(f"Error converting response to audio: {e}")
 
17
  from huggingface_hub import hf_hub_download
18
  from keras.utils import pad_sequences
19
  from transformers import BertTokenizer
 
 
 
 
20
  from selenium import webdriver
21
  from selenium.webdriver.chrome.options import Options
22
  import concurrent.futures
23
 
24
  class ChatBot:
 
 
 
 
25
  def __init__(self) -> None:
 
26
  self.history: List[Dict[str, str]] = [{"role": "system", "content": "You are a helpful assistant."}]
27
+
 
28
  def generate_response(self, prompt: str) -> str:
 
 
 
 
 
 
 
 
 
29
  self.history.append({"role": "user", "content": prompt})
 
 
 
30
  conversation: str = "\n".join(f"{msg['role']}: {msg['content']}" for msg in self.history)
31
+
32
  try:
 
33
  completion: subprocess.CompletedProcess = subprocess.run(
34
  ["ollama", "run", "llama3.2:latest"],
35
  input=conversation,
36
  capture_output=True,
37
  text=True,
38
  )
 
39
  if completion.returncode != 0:
 
40
  return "I'm sorry, I encountered an issue processing your request."
41
+
42
  response: str = completion.stdout.strip()
43
  self.history.append({"role": "assistant", "content": response})
 
 
44
  return response
45
+ except Exception:
 
 
46
  return "I'm sorry, an error occurred while processing your request."
47
 
48
  async def rate_body_of_article(self, article_title: str, article_content: str) -> str:
 
 
 
 
 
 
 
 
 
 
49
  prompt: str = f"""
50
  Given the following article title and content, provide a rating between 1 and 5
51
  based on how well the content aligns with the title and its overall quality.
52
+
53
  - **Article Title**: {article_title}
54
+ - **Article Content**: {article_content[:1000]}
55
+
56
  **Instructions:**
57
  - The rating should be a whole number between 1 and 5.
58
  - Base your score on accuracy, clarity, and relevance.
59
  - Only return a single numeric value (1-5) with no extra text.
 
 
 
60
  """
61
+
62
  try:
 
63
  completion: subprocess.CompletedProcess = subprocess.run(
64
  ["ollama", "run", "llama3.2:latest"],
65
  input=prompt,
66
  capture_output=True,
67
  text=True,
68
  )
 
69
  if completion.returncode != 0:
 
70
  return "Error"
71
+
72
  response: str = completion.stdout.strip()
73
+ return response if response.isdigit() and 1 <= int(response) <= 5 else "Error"
74
+ except Exception:
 
 
 
 
 
 
 
 
 
 
75
  return "Error"
76
 
77
  async def rate_article_credibility(self, article_title: str, article_content: str) -> str:
 
 
 
 
 
 
 
 
 
 
78
  try:
 
79
  model_path: str = hf_hub_download(repo_id="Dkethan/my-tf-nn-model-v2", filename="model.keras")
80
  new_model = keras.models.load_model(model_path)
 
 
81
  tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
82
+ max_length: int = new_model.input_shape[0][1]
83
+ X_text = tokenizer([
84
+ article_title
85
+ ], max_length=max_length, padding="max_length", truncation=True, return_tensors="tf")
86
+ X_func_rating: np.ndarray = np.array([5]).reshape(-1, 1)
87
+ predictions: np.ndarray = new_model.predict({"text_input": X_text["input_ids"], "func_rating_input": X_func_rating})
88
+ return str(np.argmax(predictions, axis=1)[0])
89
+ except Exception:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  return "Error"
91
 
92
 
93
  def extract_news_body(news_url: str) -> str:
94
+ headers: Dict[str, str] = {"User-Agent": "Mozilla/5.0"}
 
 
 
 
 
 
 
 
 
 
 
95
  retries: int = 3
96
  for attempt in range(retries):
97
  try:
98
  response: requests.Response = requests.get(news_url, headers=headers, timeout=10)
 
 
 
99
  if response.status_code != 200:
 
100
  return "Failed to fetch article."
 
101
  soup: BeautifulSoup = BeautifulSoup(response.text, "html.parser")
102
  paragraphs: List[BeautifulSoup] = soup.find_all("p")
103
+ return "\n".join([p.text.strip() for p in paragraphs if p.text.strip()])
 
 
 
 
 
104
  except requests.exceptions.Timeout:
105
+ time.sleep(2)
 
 
 
 
 
 
 
 
 
106
  return "Failed to fetch article after multiple attempts."
107
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  def current_year() -> int:
 
109
  return datetime.now().year
110
 
111
  def save_to_audio(text: str) -> None:
 
112
  try:
113
  tts: gTTS = gTTS(text=text, lang="en")
114
  tts.save("output.mp3")
115
+ except Exception:
116
+ pass