Deliverable3 / helper.py
Inara132000's picture
Update helper.py
d532801 verified
import asyncio
import json
import os
import pickle
import subprocess
import time
import urllib.parse
from datetime import datetime
from typing import Dict, List, Any, Optional
import httpx
import keras
import numpy as np
import requests
import re
from bs4 import BeautifulSoup
from gtts import gTTS
from huggingface_hub import hf_hub_download
from keras.utils import pad_sequences
from transformers import BertTokenizer
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import concurrent.futures
class ChatBot:
def __init__(self) -> None:
self.history: List[Dict[str, str]] = [{"role": "system", "content": "You are a helpful assistant."}]
def generate_response(self, prompt: str) -> str:
self.history.append({"role": "user", "content": prompt})
conversation: str = "\n".join(f"{msg['role']}: {msg['content']}" for msg in self.history)
try:
completion: subprocess.CompletedProcess = subprocess.run(
["ollama", "run", "llama3.2:latest"],
input=conversation,
capture_output=True,
text=True,
)
if completion.returncode != 0:
return "I'm sorry, I encountered an issue processing your request."
response: str = completion.stdout.strip()
self.history.append({"role": "assistant", "content": response})
return response
except Exception:
return "I'm sorry, an error occurred while processing your request."
async def rate_body_of_article(self, article_title: str, article_content: str) -> str:
prompt: str = f"""
Given the following article title and content, provide a rating between 1 and 5
based on how well the content aligns with the title and its overall quality.
- **Article Title**: {article_title}
- **Article Content**: {article_content[:1000]}
**Instructions:**
- The rating should be a whole number between 1 and 5.
- Base your score on accuracy, clarity, and relevance.
- Only return a single numeric value (1-5) with no extra text.
"""
try:
completion: subprocess.CompletedProcess = subprocess.run(
["ollama", "run", "llama3.2:latest"],
input=prompt,
capture_output=True,
text=True,
)
if completion.returncode != 0:
return "Error"
response: str = completion.stdout.strip()
return response if response.isdigit() and 1 <= int(response) <= 5 else "Error"
except Exception:
return "Error"
async def rate_article_credibility(self, article_title: str, article_content: str) -> str:
try:
model_path: str = hf_hub_download(repo_id="Dkethan/my-tf-nn-model-v2", filename="model.keras")
new_model = keras.models.load_model(model_path)
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
max_length: int = new_model.input_shape[0][1]
X_text = tokenizer([
article_title
], max_length=max_length, padding="max_length", truncation=True, return_tensors="tf")
X_func_rating: np.ndarray = np.array([5]).reshape(-1, 1)
predictions: np.ndarray = new_model.predict({"text_input": X_text["input_ids"], "func_rating_input": X_func_rating})
return str(np.argmax(predictions, axis=1)[0])
except Exception:
return "Error"
def extract_news_body(news_url: str) -> str:
headers: Dict[str, str] = {"User-Agent": "Mozilla/5.0"}
retries: int = 3
for attempt in range(retries):
try:
response: requests.Response = requests.get(news_url, headers=headers, timeout=10)
if response.status_code != 200:
return "Failed to fetch article."
soup: BeautifulSoup = BeautifulSoup(response.text, "html.parser")
paragraphs: List[BeautifulSoup] = soup.find_all("p")
return "\n".join([p.text.strip() for p in paragraphs if p.text.strip()])
except requests.exceptions.Timeout:
time.sleep(2)
return "Failed to fetch article after multiple attempts."
def current_year() -> int:
return datetime.now().year
def save_to_audio(text: str) -> None:
try:
tts: gTTS = gTTS(text=text, lang="en")
tts.save("output.mp3")
except Exception:
pass