Spaces:
Sleeping
Sleeping
| import requests | |
| from bs4 import BeautifulSoup | |
| import os | |
| from dotenv import load_dotenv | |
| from groq import Groq | |
| import gradio as gr | |
| # ------------------------------- | |
| # LOAD API KEYS | |
| # ------------------------------- | |
| load_dotenv() | |
| BRIGHTDATA_API_KEY = os.getenv("BRIGHTDATA_API_KEY") | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
| # ------------------------------- | |
| # GROQ CLIENT | |
| # ------------------------------- | |
| client = Groq(api_key=GROQ_API_KEY) | |
| # ------------------------------- | |
| # SCRAPER FUNCTION (DYNAMIC URL) | |
| # ------------------------------- | |
| def scrape_books(url): | |
| if not url: | |
| return [], "β Please enter a URL" | |
| headers = { | |
| "Authorization": f"Bearer {BRIGHTDATA_API_KEY}", | |
| "Content-Type": "application/json" | |
| } | |
| data = { | |
| "zone": "web_unlocker1", | |
| "url": url, | |
| "format": "raw" | |
| } | |
| try: | |
| response = requests.post( | |
| "https://api.brightdata.com/request", | |
| json=data, | |
| headers=headers, | |
| timeout=30 | |
| ) | |
| if response.status_code != 200: | |
| return [], f"β Failed! Status: {response.status_code}" | |
| soup = BeautifulSoup(response.text, "html.parser") | |
| books = soup.select("tr[itemtype='http://schema.org/Book']") | |
| results = [] | |
| for book in books[:10]: | |
| try: | |
| title = book.select_one("a.bookTitle span").text.strip() | |
| author = book.select_one("a.authorName span").text.strip() | |
| rating = book.select_one("span.minirating").text.strip() | |
| results.append({ | |
| "title": title, | |
| "author": author, | |
| "rating": rating | |
| }) | |
| except: | |
| pass | |
| if not results: | |
| return [], "β οΈ No books found (page structure changed?)" | |
| return results, f"β Found {len(results)} books" | |
| except Exception as e: | |
| return [], f"β Error: {str(e)}" | |
| # ------------------------------- | |
| # GLOBAL STORAGE | |
| # ------------------------------- | |
| books_data = [] | |
| # ------------------------------- | |
| # LOAD DATA BUTTON FUNCTION | |
| # ------------------------------- | |
| def load_data(url): | |
| global books_data | |
| books_data, status = scrape_books(url) | |
| return status | |
| # ------------------------------- | |
| # AI FUNCTION | |
| # ------------------------------- | |
| def ask_ai(message, history): | |
| if not books_data: | |
| return "β οΈ Please load data first using URL" | |
| context = str(books_data) | |
| try: | |
| response = client.chat.completions.create( | |
| model="llama-3.1-8b-instant", | |
| messages=[ | |
| { | |
| "role": "system", | |
| "content": f"Answer ONLY using this data: {context}" | |
| }, | |
| {"role": "user", "content": message} | |
| ] | |
| ) | |
| return response.choices[0].message.content | |
| except Exception as e: | |
| return f"β AI Error: {str(e)}" | |
| # ------------------------------- | |
| # UI (GRADIO) | |
| # ------------------------------- | |
| with gr.Blocks() as demo: | |
| gr.Markdown(""" | |
| ### π Example URL (Copy & Paste): | |
| π https://www.goodreads.com/list/show/1.Best_Books_Ever | |
| """) | |
| url_input = gr.Textbox( | |
| label="Enter Goodreads URL", | |
| placeholder="Paste URL here..." | |
| ) | |
| load_btn = gr.Button("Load Data") | |
| status_output = gr.Textbox(label="Status") | |
| load_btn.click(fn=load_data, inputs=url_input, outputs=status_output) | |
| # Chat | |
| gr.ChatInterface( | |
| fn=ask_ai, | |
| title="Ask Questions", | |
| description="Ask about the scraped books" | |
| ) | |
| demo.launch() |