Update app.py
Browse files
app.py
CHANGED
|
@@ -8,18 +8,17 @@ import numpy as np
|
|
| 8 |
import requests
|
| 9 |
import time
|
| 10 |
import re
|
| 11 |
-
import base64
|
| 12 |
import logging
|
| 13 |
import os
|
| 14 |
import sys
|
| 15 |
-
import concurrent.futures
|
| 16 |
from concurrent.futures import ThreadPoolExecutor
|
| 17 |
import threading
|
|
|
|
| 18 |
|
| 19 |
# Import OpenAI library
|
| 20 |
import openai
|
| 21 |
|
| 22 |
-
# Suppress
|
| 23 |
import urllib3
|
| 24 |
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
| 25 |
|
|
@@ -74,14 +73,15 @@ CATEGORIES = [
|
|
| 74 |
"Uncategorized",
|
| 75 |
]
|
| 76 |
|
| 77 |
-
# Set up
|
| 78 |
-
|
| 79 |
|
| 80 |
-
if not
|
| 81 |
-
logger.error("
|
| 82 |
|
| 83 |
-
openai.api_key =
|
| 84 |
-
|
|
|
|
| 85 |
|
| 86 |
# Initialize global variables for rate limiting
|
| 87 |
api_lock = threading.Lock()
|
|
@@ -178,7 +178,7 @@ def generate_summary_and_assign_category(bookmark):
|
|
| 178 |
time.sleep(sleep_duration)
|
| 179 |
last_api_call_time = time.time()
|
| 180 |
|
| 181 |
-
#
|
| 182 |
html_content = bookmark.get('html_content', '')
|
| 183 |
soup = BeautifulSoup(html_content, 'html.parser')
|
| 184 |
metadata = get_page_metadata(soup)
|
|
@@ -208,7 +208,7 @@ def generate_summary_and_assign_category(bookmark):
|
|
| 208 |
else:
|
| 209 |
use_prior_knowledge = False
|
| 210 |
|
| 211 |
-
#
|
| 212 |
if use_prior_knowledge:
|
| 213 |
prompt = f"""
|
| 214 |
You are a knowledgeable assistant with up-to-date information as of 2023.
|
|
@@ -237,27 +237,13 @@ Summary: [Your summary]
|
|
| 237 |
Category: [One category]
|
| 238 |
"""
|
| 239 |
|
| 240 |
-
#
|
| 241 |
-
def estimate_tokens(text):
|
| 242 |
-
return len(text) / 4 # Approximate token estimation
|
| 243 |
-
|
| 244 |
-
prompt_tokens = estimate_tokens(prompt)
|
| 245 |
-
max_tokens = 150 # Adjusted from 200
|
| 246 |
-
total_tokens = prompt_tokens + max_tokens
|
| 247 |
-
|
| 248 |
-
# Calculate required delay
|
| 249 |
-
tokens_per_minute = 40000
|
| 250 |
-
tokens_per_second = tokens_per_minute / 60
|
| 251 |
-
required_delay = total_tokens / tokens_per_second
|
| 252 |
-
sleep_time = max(required_delay, 2) # Ensure at least 2 seconds
|
| 253 |
-
|
| 254 |
-
# Call the LLM via Groq Cloud API
|
| 255 |
response = openai.ChatCompletion.create(
|
| 256 |
-
model='
|
| 257 |
messages=[
|
| 258 |
{"role": "user", "content": prompt}
|
| 259 |
],
|
| 260 |
-
max_tokens=
|
| 261 |
temperature=0.5,
|
| 262 |
)
|
| 263 |
|
|
@@ -283,7 +269,7 @@ Category: [One category]
|
|
| 283 |
else:
|
| 284 |
bookmark['category'] = 'Uncategorized'
|
| 285 |
|
| 286 |
-
# Simple keyword-based validation
|
| 287 |
summary_lower = bookmark['summary'].lower()
|
| 288 |
url_lower = bookmark['url'].lower()
|
| 289 |
if 'social media' in summary_lower or 'twitter' in summary_lower or 'x.com' in url_lower:
|
|
@@ -292,7 +278,6 @@ Category: [One category]
|
|
| 292 |
bookmark['category'] = 'Reference and Knowledge Bases'
|
| 293 |
|
| 294 |
logger.info("Successfully generated summary and assigned category")
|
| 295 |
-
time.sleep(sleep_time)
|
| 296 |
break # Exit the retry loop upon success
|
| 297 |
|
| 298 |
except openai.error.RateLimitError as e:
|
|
@@ -439,7 +424,6 @@ def display_bookmarks():
|
|
| 439 |
category = bookmark.get('category', 'Uncategorized')
|
| 440 |
|
| 441 |
# Escape HTML content to prevent XSS attacks
|
| 442 |
-
from html import escape
|
| 443 |
title = escape(title)
|
| 444 |
url = escape(url)
|
| 445 |
summary = escape(summary)
|
|
@@ -493,12 +477,12 @@ def process_uploaded_file(file, state_bookmarks):
|
|
| 493 |
|
| 494 |
# Fetch bookmark info concurrently
|
| 495 |
logger.info("Fetching URL info concurrently")
|
| 496 |
-
with ThreadPoolExecutor(max_workers=10) as executor: #
|
| 497 |
executor.map(fetch_url_info, bookmarks)
|
| 498 |
|
| 499 |
# Process bookmarks concurrently with LLM calls
|
| 500 |
logger.info("Processing bookmarks with LLM concurrently")
|
| 501 |
-
with ThreadPoolExecutor(max_workers=1) as executor: #
|
| 502 |
executor.map(generate_summary_and_assign_category, bookmarks)
|
| 503 |
|
| 504 |
try:
|
|
@@ -531,12 +515,15 @@ def delete_selected_bookmarks(selected_indices, state_bookmarks):
|
|
| 531 |
ids_to_delete = []
|
| 532 |
indices_to_delete = []
|
| 533 |
for s in selected_indices:
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
|
|
|
|
|
|
|
|
|
|
| 540 |
|
| 541 |
# Remove vectors from FAISS index
|
| 542 |
if faiss_index is not None and ids_to_delete:
|
|
@@ -565,11 +552,20 @@ def edit_selected_bookmarks_category(selected_indices, new_category, state_bookm
|
|
| 565 |
if not new_category:
|
| 566 |
return "⚠️ No new category selected.", gr.CheckboxGroup.update(choices=[]), display_bookmarks(), state_bookmarks
|
| 567 |
|
| 568 |
-
indices = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 569 |
for idx in indices:
|
| 570 |
-
|
| 571 |
-
|
| 572 |
-
logger.info(f"Updated category for bookmark {idx + 1} to {new_category}")
|
| 573 |
|
| 574 |
message = "✏️ Category updated for selected bookmarks."
|
| 575 |
logger.info(message)
|
|
@@ -589,7 +585,7 @@ def export_bookmarks():
|
|
| 589 |
"""
|
| 590 |
if not bookmarks:
|
| 591 |
logger.warning("No bookmarks to export")
|
| 592 |
-
return None # Return None
|
| 593 |
|
| 594 |
try:
|
| 595 |
logger.info("Exporting bookmarks to HTML")
|
|
@@ -639,7 +635,7 @@ def chatbot_response(user_query, chat_history):
|
|
| 639 |
time.sleep(sleep_duration)
|
| 640 |
last_api_call_time = time.time()
|
| 641 |
|
| 642 |
-
#
|
| 643 |
query_vector = embedding_model.encode([user_query]).astype('float32')
|
| 644 |
k = 5 # Number of results to return
|
| 645 |
distances, ids = faiss_index.search(query_vector, k)
|
|
@@ -660,7 +656,7 @@ def chatbot_response(user_query, chat_history):
|
|
| 660 |
for bookmark in matching_bookmarks
|
| 661 |
])
|
| 662 |
|
| 663 |
-
#
|
| 664 |
prompt = f"""
|
| 665 |
A user asked: "{user_query}"
|
| 666 |
Based on the bookmarks below, provide a helpful answer to the user's query, referencing the relevant bookmarks.
|
|
@@ -669,33 +665,18 @@ Bookmarks:
|
|
| 669 |
Provide a concise and helpful response.
|
| 670 |
"""
|
| 671 |
|
| 672 |
-
#
|
| 673 |
-
def estimate_tokens(text):
|
| 674 |
-
return len(text) / 4 # Approximate token estimation
|
| 675 |
-
|
| 676 |
-
prompt_tokens = estimate_tokens(prompt)
|
| 677 |
-
max_tokens = 300 # Adjust as needed
|
| 678 |
-
total_tokens = prompt_tokens + max_tokens
|
| 679 |
-
|
| 680 |
-
# Calculate required delay
|
| 681 |
-
tokens_per_minute = 40000
|
| 682 |
-
tokens_per_second = tokens_per_minute / 60
|
| 683 |
-
required_delay = total_tokens / tokens_per_second
|
| 684 |
-
sleep_time = max(required_delay, 2) # Ensure at least 2 seconds
|
| 685 |
-
|
| 686 |
-
# Call the LLM via Groq Cloud API
|
| 687 |
response = openai.ChatCompletion.create(
|
| 688 |
-
model='
|
| 689 |
messages=[
|
| 690 |
{"role": "user", "content": prompt}
|
| 691 |
],
|
| 692 |
-
max_tokens=
|
| 693 |
temperature=0.7,
|
| 694 |
)
|
| 695 |
|
| 696 |
answer = response['choices'][0]['message']['content'].strip()
|
| 697 |
logger.info("Chatbot response generated")
|
| 698 |
-
time.sleep(sleep_time)
|
| 699 |
|
| 700 |
# Append the assistant's response to chat history
|
| 701 |
chat_history.append({"role": "assistant", "content": answer})
|
|
|
|
| 8 |
import requests
|
| 9 |
import time
|
| 10 |
import re
|
|
|
|
| 11 |
import logging
|
| 12 |
import os
|
| 13 |
import sys
|
|
|
|
| 14 |
from concurrent.futures import ThreadPoolExecutor
|
| 15 |
import threading
|
| 16 |
+
from html import escape
|
| 17 |
|
| 18 |
# Import OpenAI library
|
| 19 |
import openai
|
| 20 |
|
| 21 |
+
# Suppress specific warnings
|
| 22 |
import urllib3
|
| 23 |
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
| 24 |
|
|
|
|
| 73 |
"Uncategorized",
|
| 74 |
]
|
| 75 |
|
| 76 |
+
# Set up OpenAI API key and base URL
|
| 77 |
+
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
|
| 78 |
|
| 79 |
+
if not OPENAI_API_KEY:
|
| 80 |
+
logger.error("OPENAI_API_KEY environment variable not set.")
|
| 81 |
|
| 82 |
+
openai.api_key = OPENAI_API_KEY
|
| 83 |
+
# If you're using a custom API base, uncomment and set it
|
| 84 |
+
# openai.api_base = "https://api.your-provider.com/v1"
|
| 85 |
|
| 86 |
# Initialize global variables for rate limiting
|
| 87 |
api_lock = threading.Lock()
|
|
|
|
| 178 |
time.sleep(sleep_duration)
|
| 179 |
last_api_call_time = time.time()
|
| 180 |
|
| 181 |
+
# Prepare the prompt
|
| 182 |
html_content = bookmark.get('html_content', '')
|
| 183 |
soup = BeautifulSoup(html_content, 'html.parser')
|
| 184 |
metadata = get_page_metadata(soup)
|
|
|
|
| 208 |
else:
|
| 209 |
use_prior_knowledge = False
|
| 210 |
|
| 211 |
+
# Craft the prompt based on content availability
|
| 212 |
if use_prior_knowledge:
|
| 213 |
prompt = f"""
|
| 214 |
You are a knowledgeable assistant with up-to-date information as of 2023.
|
|
|
|
| 237 |
Category: [One category]
|
| 238 |
"""
|
| 239 |
|
| 240 |
+
# Call the LLM via OpenAI API
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
response = openai.ChatCompletion.create(
|
| 242 |
+
model='gpt-4', # Ensure you're using a valid and accessible model
|
| 243 |
messages=[
|
| 244 |
{"role": "user", "content": prompt}
|
| 245 |
],
|
| 246 |
+
max_tokens=150,
|
| 247 |
temperature=0.5,
|
| 248 |
)
|
| 249 |
|
|
|
|
| 269 |
else:
|
| 270 |
bookmark['category'] = 'Uncategorized'
|
| 271 |
|
| 272 |
+
# Optional: Simple keyword-based validation
|
| 273 |
summary_lower = bookmark['summary'].lower()
|
| 274 |
url_lower = bookmark['url'].lower()
|
| 275 |
if 'social media' in summary_lower or 'twitter' in summary_lower or 'x.com' in url_lower:
|
|
|
|
| 278 |
bookmark['category'] = 'Reference and Knowledge Bases'
|
| 279 |
|
| 280 |
logger.info("Successfully generated summary and assigned category")
|
|
|
|
| 281 |
break # Exit the retry loop upon success
|
| 282 |
|
| 283 |
except openai.error.RateLimitError as e:
|
|
|
|
| 424 |
category = bookmark.get('category', 'Uncategorized')
|
| 425 |
|
| 426 |
# Escape HTML content to prevent XSS attacks
|
|
|
|
| 427 |
title = escape(title)
|
| 428 |
url = escape(url)
|
| 429 |
summary = escape(summary)
|
|
|
|
| 477 |
|
| 478 |
# Fetch bookmark info concurrently
|
| 479 |
logger.info("Fetching URL info concurrently")
|
| 480 |
+
with ThreadPoolExecutor(max_workers=10) as executor: # Adjust max_workers as needed
|
| 481 |
executor.map(fetch_url_info, bookmarks)
|
| 482 |
|
| 483 |
# Process bookmarks concurrently with LLM calls
|
| 484 |
logger.info("Processing bookmarks with LLM concurrently")
|
| 485 |
+
with ThreadPoolExecutor(max_workers=1) as executor: # Serialize API calls to respect rate limits
|
| 486 |
executor.map(generate_summary_and_assign_category, bookmarks)
|
| 487 |
|
| 488 |
try:
|
|
|
|
| 515 |
ids_to_delete = []
|
| 516 |
indices_to_delete = []
|
| 517 |
for s in selected_indices:
|
| 518 |
+
try:
|
| 519 |
+
idx = int(s.split('.')[0]) - 1
|
| 520 |
+
if 0 <= idx < len(bookmarks):
|
| 521 |
+
bookmark_id = bookmarks[idx]['id']
|
| 522 |
+
ids_to_delete.append(bookmark_id)
|
| 523 |
+
indices_to_delete.append(idx)
|
| 524 |
+
logger.info(f"Deleting bookmark at index {idx + 1}")
|
| 525 |
+
except (ValueError, IndexError):
|
| 526 |
+
logger.warning(f"Invalid selection format: {s}")
|
| 527 |
|
| 528 |
# Remove vectors from FAISS index
|
| 529 |
if faiss_index is not None and ids_to_delete:
|
|
|
|
| 552 |
if not new_category:
|
| 553 |
return "⚠️ No new category selected.", gr.CheckboxGroup.update(choices=[]), display_bookmarks(), state_bookmarks
|
| 554 |
|
| 555 |
+
indices = []
|
| 556 |
+
for s in selected_indices:
|
| 557 |
+
try:
|
| 558 |
+
idx = int(s.split('.')[0])-1
|
| 559 |
+
if 0 <= idx < len(bookmarks):
|
| 560 |
+
indices.append(idx)
|
| 561 |
+
else:
|
| 562 |
+
logger.warning(f"Index out of range: {idx + 1}")
|
| 563 |
+
except ValueError:
|
| 564 |
+
logger.warning(f"Invalid selection format: {s}")
|
| 565 |
+
|
| 566 |
for idx in indices:
|
| 567 |
+
bookmarks[idx]['category'] = new_category
|
| 568 |
+
logger.info(f"Updated category for bookmark {idx + 1} to {new_category}")
|
|
|
|
| 569 |
|
| 570 |
message = "✏️ Category updated for selected bookmarks."
|
| 571 |
logger.info(message)
|
|
|
|
| 585 |
"""
|
| 586 |
if not bookmarks:
|
| 587 |
logger.warning("No bookmarks to export")
|
| 588 |
+
return None # Return None to indicate no file
|
| 589 |
|
| 590 |
try:
|
| 591 |
logger.info("Exporting bookmarks to HTML")
|
|
|
|
| 635 |
time.sleep(sleep_duration)
|
| 636 |
last_api_call_time = time.time()
|
| 637 |
|
| 638 |
+
# Encode the query and search the FAISS index
|
| 639 |
query_vector = embedding_model.encode([user_query]).astype('float32')
|
| 640 |
k = 5 # Number of results to return
|
| 641 |
distances, ids = faiss_index.search(query_vector, k)
|
|
|
|
| 656 |
for bookmark in matching_bookmarks
|
| 657 |
])
|
| 658 |
|
| 659 |
+
# Craft the prompt for the LLM
|
| 660 |
prompt = f"""
|
| 661 |
A user asked: "{user_query}"
|
| 662 |
Based on the bookmarks below, provide a helpful answer to the user's query, referencing the relevant bookmarks.
|
|
|
|
| 665 |
Provide a concise and helpful response.
|
| 666 |
"""
|
| 667 |
|
| 668 |
+
# Call the LLM via OpenAI API
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 669 |
response = openai.ChatCompletion.create(
|
| 670 |
+
model='gpt-4', # Ensure you're using a valid and accessible model
|
| 671 |
messages=[
|
| 672 |
{"role": "user", "content": prompt}
|
| 673 |
],
|
| 674 |
+
max_tokens=300,
|
| 675 |
temperature=0.7,
|
| 676 |
)
|
| 677 |
|
| 678 |
answer = response['choices'][0]['message']['content'].strip()
|
| 679 |
logger.info("Chatbot response generated")
|
|
|
|
| 680 |
|
| 681 |
# Append the assistant's response to chat history
|
| 682 |
chat_history.append({"role": "assistant", "content": answer})
|