ShanzaGull's picture
Update app.py
bdb01f5 verified
import requests
from bs4 import BeautifulSoup
import os
from dotenv import load_dotenv
from groq import Groq
import gradio as gr
# -------------------------------
# LOAD API KEYS
# -------------------------------
load_dotenv()
BRIGHTDATA_API_KEY = os.getenv("BRIGHTDATA_API_KEY")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
# -------------------------------
# GROQ CLIENT
# -------------------------------
client = Groq(api_key=GROQ_API_KEY)
# -------------------------------
# SCRAPER FUNCTION (DYNAMIC URL)
# -------------------------------
def scrape_books(url):
if not url:
return [], "❌ Please enter a URL"
headers = {
"Authorization": f"Bearer {BRIGHTDATA_API_KEY}",
"Content-Type": "application/json"
}
data = {
"zone": "web_unlocker1",
"url": url,
"format": "raw"
}
try:
response = requests.post(
"https://api.brightdata.com/request",
json=data,
headers=headers,
timeout=30
)
if response.status_code != 200:
return [], f"❌ Failed! Status: {response.status_code}"
soup = BeautifulSoup(response.text, "html.parser")
books = soup.select("tr[itemtype='http://schema.org/Book']")
results = []
for book in books[:10]:
try:
title = book.select_one("a.bookTitle span").text.strip()
author = book.select_one("a.authorName span").text.strip()
rating = book.select_one("span.minirating").text.strip()
results.append({
"title": title,
"author": author,
"rating": rating
})
except:
pass
if not results:
return [], "⚠️ No books found (page structure changed?)"
return results, f"βœ… Found {len(results)} books"
except Exception as e:
return [], f"❌ Error: {str(e)}"
# -------------------------------
# GLOBAL STORAGE
# -------------------------------
books_data = []
# -------------------------------
# LOAD DATA BUTTON FUNCTION
# -------------------------------
def load_data(url):
global books_data
books_data, status = scrape_books(url)
return status
# -------------------------------
# AI FUNCTION
# -------------------------------
def ask_ai(message, history):
if not books_data:
return "⚠️ Please load data first using URL"
context = str(books_data)
try:
response = client.chat.completions.create(
model="llama-3.1-8b-instant",
messages=[
{
"role": "system",
"content": f"Answer ONLY using this data: {context}"
},
{"role": "user", "content": message}
]
)
return response.choices[0].message.content
except Exception as e:
return f"❌ AI Error: {str(e)}"
# -------------------------------
# UI (GRADIO)
# -------------------------------
with gr.Blocks() as demo:
gr.Markdown("""
### πŸ”— Example URL (Copy & Paste):
πŸ‘‰ https://www.goodreads.com/list/show/1.Best_Books_Ever
""")
url_input = gr.Textbox(
label="Enter Goodreads URL",
placeholder="Paste URL here..."
)
load_btn = gr.Button("Load Data")
status_output = gr.Textbox(label="Status")
load_btn.click(fn=load_data, inputs=url_input, outputs=status_output)
# Chat
gr.ChatInterface(
fn=ask_ai,
title="Ask Questions",
description="Ask about the scraped books"
)
demo.launch()