Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -11,6 +11,8 @@ from langchain_groq import ChatGroq
|
|
| 11 |
from langchain.embeddings import OpenAIEmbeddings
|
| 12 |
from langchain.vectorstores import FAISS
|
| 13 |
from langchain.vectorstores import Chroma
|
|
|
|
|
|
|
| 14 |
|
| 15 |
|
| 16 |
from dotenv import load_dotenv
|
|
@@ -32,12 +34,29 @@ llm = ChatGroq(model_name="llama-3.3-70b-versatile", temperature=0.9, max_tokens
|
|
| 32 |
|
| 33 |
if process_url_clicked:
|
| 34 |
# load data
|
| 35 |
-
loader = UnstructuredURLLoader(urls=urls)
|
| 36 |
-
main_placeholder.text("Data Loading...Started...β
β
β
")
|
| 37 |
-
data = loader.load()
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
# split data
|
| 42 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 43 |
separators=['\n\n', '\n', '.', ','],
|
|
|
|
| 11 |
from langchain.embeddings import OpenAIEmbeddings
|
| 12 |
from langchain.vectorstores import FAISS
|
| 13 |
from langchain.vectorstores import Chroma
|
| 14 |
+
import requests
|
| 15 |
+
from bs4 import BeautifulSoup
|
| 16 |
|
| 17 |
|
| 18 |
from dotenv import load_dotenv
|
|
|
|
| 34 |
|
| 35 |
if process_url_clicked:
|
| 36 |
# load data
|
| 37 |
+
#loader = UnstructuredURLLoader(urls=urls)
|
| 38 |
+
#main_placeholder.text("Data Loading...Started...β
β
β
")
|
| 39 |
+
#data = loader.load()
|
| 40 |
+
def fetch_web_content(url):
|
| 41 |
+
try:
|
| 42 |
+
response = requests.get(url, timeout=10)
|
| 43 |
+
response.raise_for_status()
|
| 44 |
+
soup = BeautifulSoup(response.text, "html.parser")
|
| 45 |
+
return soup.get_text()
|
| 46 |
+
except Exception as e:
|
| 47 |
+
return f"Error fetching {url}: {str(e)}"
|
| 48 |
+
|
| 49 |
+
# Your list of URLs
|
| 50 |
+
url = url
|
| 51 |
+
|
| 52 |
+
# Display status message
|
| 53 |
+
main_placeholder.text("Data Loading...Started...β
β
β
")
|
| 54 |
+
|
| 55 |
+
# Fetch content
|
| 56 |
+
data = [fetch_web_content(url) for url in urls if url.strip()]
|
| 57 |
+
|
| 58 |
+
# Display completion message
|
| 59 |
+
main_placeholder.text("Data Loading...Completed...β
β
β
")
|
| 60 |
# split data
|
| 61 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 62 |
separators=['\n\n', '\n', '.', ','],
|