Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -22,6 +22,8 @@ from dotenv import load_dotenv
|
|
| 22 |
import certifi
|
| 23 |
from bs4 import BeautifulSoup
|
| 24 |
import requests
|
|
|
|
|
|
|
| 25 |
|
| 26 |
# Load environment variables from a .env file
|
| 27 |
load_dotenv()
|
|
@@ -37,7 +39,7 @@ SEARXNG_KEY = 'f9f07f93b37b8483aadb5ba717f556f3a4ac507b281b4ca01e6c6288aa3e3ae5'
|
|
| 37 |
# Use the environment variable
|
| 38 |
HF_TOKEN = os.getenv('HF_TOKEN')
|
| 39 |
client = InferenceClient(
|
| 40 |
-
"
|
| 41 |
token=HF_TOKEN,
|
| 42 |
)
|
| 43 |
|
|
@@ -253,7 +255,7 @@ Remember to focus on financial aspects and implications in your assessment and s
|
|
| 253 |
logger.error(f"Error assessing relevance and summarizing with LLM: {e}")
|
| 254 |
return "Error: Unable to assess relevance and summarize"
|
| 255 |
|
| 256 |
-
def scrape_full_content(url, scraper="
|
| 257 |
try:
|
| 258 |
logger.info(f"Scraping full content from: {url}")
|
| 259 |
|
|
@@ -311,7 +313,7 @@ Your response should be detailed, informative, accurate, and directly relevant t
|
|
| 311 |
try:
|
| 312 |
response = llm_client.chat_completion(
|
| 313 |
messages=messages,
|
| 314 |
-
max_tokens=
|
| 315 |
temperature=temperature
|
| 316 |
)
|
| 317 |
return response.choices[0].message.content.strip()
|
|
@@ -319,12 +321,7 @@ Your response should be detailed, informative, accurate, and directly relevant t
|
|
| 319 |
logger.error(f"Error in LLM summarization: {e}")
|
| 320 |
return "Error: Unable to generate a summary. Please try again."
|
| 321 |
|
| 322 |
-
|
| 323 |
-
from trafilatura import extract
|
| 324 |
-
from trafilatura.settings import use_config
|
| 325 |
-
from urllib.request import urlopen, Request
|
| 326 |
-
|
| 327 |
-
def search_and_scrape(query, chat_history, num_results=5, scraper="trafilatura", max_chars=3000, time_range="", language="all", category="",
|
| 328 |
engines=[], safesearch=2, method="GET", llm_temperature=0.2, timeout=5):
|
| 329 |
try:
|
| 330 |
# Step 1: Rephrase the Query
|
|
@@ -561,7 +558,7 @@ iface = gr.ChatInterface(
|
|
| 561 |
description="Enter your query, and I'll search the web for the most recent and relevant financial news, scrape content, and provide summarized results.",
|
| 562 |
additional_inputs=[
|
| 563 |
gr.Slider(5, 20, value=10, step=1, label="Number of initial results"),
|
| 564 |
-
gr.Dropdown(["bs4", "trafilatura"], value="
|
| 565 |
gr.Slider(500, 10000, value=1500, step=100, label="Max characters to retrieve"),
|
| 566 |
gr.Dropdown(["", "day", "week", "month", "year"], value="year", label="Time Range"),
|
| 567 |
gr.Dropdown(["all", "en", "fr", "de", "es", "it", "nl", "pt", "pl", "ru", "zh"], value="en", label="Language"),
|
|
|
|
| 22 |
import certifi
|
| 23 |
from bs4 import BeautifulSoup
|
| 24 |
import requests
|
| 25 |
+
from trafilatura.settings import use_config
|
| 26 |
+
from urllib.request import urlopen, Request
|
| 27 |
|
| 28 |
# Load environment variables from a .env file
|
| 29 |
load_dotenv()
|
|
|
|
| 39 |
# Use the environment variable
|
| 40 |
HF_TOKEN = os.getenv('HF_TOKEN')
|
| 41 |
client = InferenceClient(
|
| 42 |
+
"mistralai/Mistral-Nemo-Instruct-2407",
|
| 43 |
token=HF_TOKEN,
|
| 44 |
)
|
| 45 |
|
|
|
|
| 255 |
logger.error(f"Error assessing relevance and summarizing with LLM: {e}")
|
| 256 |
return "Error: Unable to assess relevance and summarize"
|
| 257 |
|
| 258 |
+
def scrape_full_content(url, scraper="bs4", max_chars=3000, timeout=5):
|
| 259 |
try:
|
| 260 |
logger.info(f"Scraping full content from: {url}")
|
| 261 |
|
|
|
|
| 313 |
try:
|
| 314 |
response = llm_client.chat_completion(
|
| 315 |
messages=messages,
|
| 316 |
+
max_tokens=10000,
|
| 317 |
temperature=temperature
|
| 318 |
)
|
| 319 |
return response.choices[0].message.content.strip()
|
|
|
|
| 321 |
logger.error(f"Error in LLM summarization: {e}")
|
| 322 |
return "Error: Unable to generate a summary. Please try again."
|
| 323 |
|
| 324 |
+
def search_and_scrape(query, chat_history, num_results=5, scraper="bs4", max_chars=3000, time_range="", language="all", category="",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 325 |
engines=[], safesearch=2, method="GET", llm_temperature=0.2, timeout=5):
|
| 326 |
try:
|
| 327 |
# Step 1: Rephrase the Query
|
|
|
|
| 558 |
description="Enter your query, and I'll search the web for the most recent and relevant financial news, scrape content, and provide summarized results.",
|
| 559 |
additional_inputs=[
|
| 560 |
gr.Slider(5, 20, value=10, step=1, label="Number of initial results"),
|
| 561 |
+
gr.Dropdown(["bs4", "trafilatura"], value="bs4", label="Scraping Method"),
|
| 562 |
gr.Slider(500, 10000, value=1500, step=100, label="Max characters to retrieve"),
|
| 563 |
gr.Dropdown(["", "day", "week", "month", "year"], value="year", label="Time Range"),
|
| 564 |
gr.Dropdown(["all", "en", "fr", "de", "es", "it", "nl", "pt", "pl", "ru", "zh"], value="en", label="Language"),
|