Update app.py
Browse files
app.py
CHANGED
|
@@ -8,6 +8,7 @@ import faiss
|
|
| 8 |
import numpy as np
|
| 9 |
import asyncio
|
| 10 |
import aiohttp
|
|
|
|
| 11 |
|
| 12 |
# Initialize models and variables
|
| 13 |
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
|
|
@@ -15,6 +16,30 @@ faiss_index = None
|
|
| 15 |
bookmarks = []
|
| 16 |
fetch_cache = {}
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
def parse_bookmarks(file_content):
|
| 19 |
soup = BeautifulSoup(file_content, 'html.parser')
|
| 20 |
extracted_bookmarks = []
|
|
@@ -89,6 +114,45 @@ def generate_summary(bookmark):
|
|
| 89 |
bookmark['summary'] = 'No summary available.'
|
| 90 |
return bookmark
|
| 91 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
def vectorize_and_index(bookmarks):
|
| 93 |
summaries = [bookmark['summary'] for bookmark in bookmarks]
|
| 94 |
embeddings = embedding_model.encode(summaries)
|
|
@@ -106,6 +170,7 @@ def display_bookmarks():
|
|
| 106 |
url = bookmark['url']
|
| 107 |
etag = bookmark.get('etag', 'N/A')
|
| 108 |
summary = bookmark.get('summary', '')
|
|
|
|
| 109 |
|
| 110 |
# Apply inline styles for dead links
|
| 111 |
if bookmark.get('dead_link'):
|
|
@@ -119,6 +184,7 @@ def display_bookmarks():
|
|
| 119 |
<div class="card" style="{card_style}">
|
| 120 |
<div class="card-content">
|
| 121 |
<h3 style="{text_style}">{index}. {title}</h3>
|
|
|
|
| 122 |
<p style="{text_style}"><strong>URL:</strong> <a href="{url}" target="_blank" style="{text_style}">{url}</a></p>
|
| 123 |
<p style="{text_style}"><strong>Status:</strong> {status}</p>
|
| 124 |
<p style="{text_style}"><strong>ETag:</strong> {etag}</p>
|
|
@@ -146,9 +212,10 @@ def process_uploaded_file(file):
|
|
| 146 |
# Asynchronously fetch bookmark info
|
| 147 |
asyncio.run(process_bookmarks_async(bookmarks))
|
| 148 |
|
| 149 |
-
# Generate summaries
|
| 150 |
for bookmark in bookmarks:
|
| 151 |
generate_summary(bookmark)
|
|
|
|
| 152 |
|
| 153 |
faiss_index, embeddings = vectorize_and_index(bookmarks)
|
| 154 |
message = f"Successfully processed {len(bookmarks)} bookmarks."
|
|
@@ -169,7 +236,7 @@ def chatbot_response(user_query):
|
|
| 169 |
if idx < len(bookmarks):
|
| 170 |
bookmark = bookmarks[idx]
|
| 171 |
index = bookmarks.index(bookmark) + 1 # Start index at 1
|
| 172 |
-
response += f"{index}. Title: {bookmark['title']}\nURL: {bookmark['url']}\nSummary: {bookmark['summary']}\n\n"
|
| 173 |
return response.strip()
|
| 174 |
|
| 175 |
def edit_bookmark(bookmark_idx, new_title, new_url):
|
|
@@ -183,6 +250,7 @@ def edit_bookmark(bookmark_idx, new_title, new_url):
|
|
| 183 |
# Re-fetch bookmark info
|
| 184 |
asyncio.run(process_bookmarks_async([bookmarks[bookmark_idx]]))
|
| 185 |
generate_summary(bookmarks[bookmark_idx])
|
|
|
|
| 186 |
# Rebuild the FAISS index
|
| 187 |
faiss_index, embeddings = vectorize_and_index(bookmarks)
|
| 188 |
message = "Bookmark updated successfully."
|
|
|
|
| 8 |
import numpy as np
|
| 9 |
import asyncio
|
| 10 |
import aiohttp
|
| 11 |
+
import re
|
| 12 |
|
| 13 |
# Initialize models and variables
|
| 14 |
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
|
|
|
|
| 16 |
bookmarks = []
|
| 17 |
fetch_cache = {}
|
| 18 |
|
| 19 |
+
# Define the categories
|
| 20 |
+
CATEGORIES = [
|
| 21 |
+
"Social Media",
|
| 22 |
+
"News and Media",
|
| 23 |
+
"Education and Learning",
|
| 24 |
+
"Entertainment",
|
| 25 |
+
"Shopping and E-commerce",
|
| 26 |
+
"Finance and Banking",
|
| 27 |
+
"Technology",
|
| 28 |
+
"Health and Fitness",
|
| 29 |
+
"Travel and Tourism",
|
| 30 |
+
"Food and Recipes",
|
| 31 |
+
"Sports",
|
| 32 |
+
"Arts and Culture",
|
| 33 |
+
"Government and Politics",
|
| 34 |
+
"Business and Economy",
|
| 35 |
+
"Science and Research",
|
| 36 |
+
"Personal Blogs and Journals",
|
| 37 |
+
"Job Search and Careers",
|
| 38 |
+
"Music and Audio",
|
| 39 |
+
"Videos and Movies",
|
| 40 |
+
"Reference and Knowledge Bases",
|
| 41 |
+
]
|
| 42 |
+
|
| 43 |
def parse_bookmarks(file_content):
|
| 44 |
soup = BeautifulSoup(file_content, 'html.parser')
|
| 45 |
extracted_bookmarks = []
|
|
|
|
| 114 |
bookmark['summary'] = 'No summary available.'
|
| 115 |
return bookmark
|
| 116 |
|
| 117 |
+
def assign_category(bookmark):
|
| 118 |
+
summary = bookmark.get('summary', '').lower()
|
| 119 |
+
assigned_category = 'Uncategorized'
|
| 120 |
+
|
| 121 |
+
# Keywords associated with each category
|
| 122 |
+
category_keywords = {
|
| 123 |
+
"Social Media": ["social media", "networking", "friends", "connect", "posts", "profile"],
|
| 124 |
+
"News and Media": ["news", "journalism", "media", "headlines", "breaking news"],
|
| 125 |
+
"Education and Learning": ["education", "learning", "courses", "tutorial", "university", "academy", "study"],
|
| 126 |
+
"Entertainment": ["entertainment", "movies", "tv shows", "games", "comics", "fun"],
|
| 127 |
+
"Shopping and E-commerce": ["shopping", "e-commerce", "buy", "sell", "marketplace", "deals", "store"],
|
| 128 |
+
"Finance and Banking": ["finance", "banking", "investment", "money", "economy", "stock", "trading"],
|
| 129 |
+
"Technology": ["technology", "tech", "gadgets", "software", "computers", "innovation"],
|
| 130 |
+
"Health and Fitness": ["health", "fitness", "medical", "wellness", "exercise", "diet"],
|
| 131 |
+
"Travel and Tourism": ["travel", "tourism", "destinations", "hotels", "flights", "vacation"],
|
| 132 |
+
"Food and Recipes": ["food", "recipes", "cooking", "cuisine", "restaurant", "dining"],
|
| 133 |
+
"Sports": ["sports", "scores", "teams", "athletics", "matches", "leagues"],
|
| 134 |
+
"Arts and Culture": ["arts", "culture", "museum", "gallery", "exhibition", "artistic"],
|
| 135 |
+
"Government and Politics": ["government", "politics", "policy", "election", "public service"],
|
| 136 |
+
"Business and Economy": ["business", "corporate", "industry", "economy", "markets"],
|
| 137 |
+
"Science and Research": ["science", "research", "experiment", "laboratory", "study", "scientific"],
|
| 138 |
+
"Personal Blogs and Journals": ["blog", "journal", "personal", "diary", "thoughts", "opinions"],
|
| 139 |
+
"Job Search and Careers": ["jobs", "careers", "recruitment", "resume", "employment", "hiring"],
|
| 140 |
+
"Music and Audio": ["music", "audio", "songs", "albums", "artists", "bands"],
|
| 141 |
+
"Videos and Movies": ["video", "movies", "film", "clips", "trailers", "cinema"],
|
| 142 |
+
"Reference and Knowledge Bases": ["reference", "encyclopedia", "dictionary", "wiki", "knowledge", "information"],
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
for category, keywords in category_keywords.items():
|
| 146 |
+
for keyword in keywords:
|
| 147 |
+
if re.search(r'\b' + re.escape(keyword) + r'\b', summary):
|
| 148 |
+
assigned_category = category
|
| 149 |
+
break
|
| 150 |
+
if assigned_category != 'Uncategorized':
|
| 151 |
+
break
|
| 152 |
+
|
| 153 |
+
bookmark['category'] = assigned_category
|
| 154 |
+
return bookmark
|
| 155 |
+
|
| 156 |
def vectorize_and_index(bookmarks):
|
| 157 |
summaries = [bookmark['summary'] for bookmark in bookmarks]
|
| 158 |
embeddings = embedding_model.encode(summaries)
|
|
|
|
| 170 |
url = bookmark['url']
|
| 171 |
etag = bookmark.get('etag', 'N/A')
|
| 172 |
summary = bookmark.get('summary', '')
|
| 173 |
+
category = bookmark.get('category', 'Uncategorized')
|
| 174 |
|
| 175 |
# Apply inline styles for dead links
|
| 176 |
if bookmark.get('dead_link'):
|
|
|
|
| 184 |
<div class="card" style="{card_style}">
|
| 185 |
<div class="card-content">
|
| 186 |
<h3 style="{text_style}">{index}. {title}</h3>
|
| 187 |
+
<p style="{text_style}"><strong>Category:</strong> {category}</p>
|
| 188 |
<p style="{text_style}"><strong>URL:</strong> <a href="{url}" target="_blank" style="{text_style}">{url}</a></p>
|
| 189 |
<p style="{text_style}"><strong>Status:</strong> {status}</p>
|
| 190 |
<p style="{text_style}"><strong>ETag:</strong> {etag}</p>
|
|
|
|
| 212 |
# Asynchronously fetch bookmark info
|
| 213 |
asyncio.run(process_bookmarks_async(bookmarks))
|
| 214 |
|
| 215 |
+
# Generate summaries and assign categories
|
| 216 |
for bookmark in bookmarks:
|
| 217 |
generate_summary(bookmark)
|
| 218 |
+
assign_category(bookmark)
|
| 219 |
|
| 220 |
faiss_index, embeddings = vectorize_and_index(bookmarks)
|
| 221 |
message = f"Successfully processed {len(bookmarks)} bookmarks."
|
|
|
|
| 236 |
if idx < len(bookmarks):
|
| 237 |
bookmark = bookmarks[idx]
|
| 238 |
index = bookmarks.index(bookmark) + 1 # Start index at 1
|
| 239 |
+
response += f"{index}. Title: {bookmark['title']}\nURL: {bookmark['url']}\nCategory: {bookmark.get('category', 'Uncategorized')}\nSummary: {bookmark['summary']}\n\n"
|
| 240 |
return response.strip()
|
| 241 |
|
| 242 |
def edit_bookmark(bookmark_idx, new_title, new_url):
|
|
|
|
| 250 |
# Re-fetch bookmark info
|
| 251 |
asyncio.run(process_bookmarks_async([bookmarks[bookmark_idx]]))
|
| 252 |
generate_summary(bookmarks[bookmark_idx])
|
| 253 |
+
assign_category(bookmarks[bookmark_idx])
|
| 254 |
# Rebuild the FAISS index
|
| 255 |
faiss_index, embeddings = vectorize_and_index(bookmarks)
|
| 256 |
message = "Bookmark updated successfully."
|