Wosqa commited on
Commit
07b7a09
·
verified ·
1 Parent(s): 6f9b532

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +140 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import pandas as pd
4
+ from bs4 import BeautifulSoup
5
+ import gradio as gr
6
+ from dotenv import load_dotenv
7
+ import os
8
+ load_dotenv()
9
+ bright_key = os.getenv("BRIGHTDATA_API_KEY")
10
+ groq_key = os.getenv("GROQ_API_KEY")
11
+ print(bright_key[:5])
12
+
13
+ import os
14
+ from dotenv import load_dotenv
15
+ load_dotenv()
16
+ BRIGHTDATA_API_KEY = os.getenv("BRIGHTDATA_API_KEY")
17
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
18
+
19
+ import requests
20
+
21
+ target_url = "https://www.goodreads.com/list/show/1.Best_Books_Ever"
22
+
23
+ brightdata_url = "https://api.brightdata.com/request"
24
+
25
+ headers = {
26
+ "Authorization": f"Bearer {BRIGHTDATA_API_KEY}",
27
+ "Content-Type": "application/json"
28
+ }
29
+
30
+ payload = {
31
+ "url": target_url,
32
+ "zone": "web_unlocker1",
33
+ "format": "raw"
34
+ }
35
+
36
+ response = requests.post(brightdata_url, json=payload, headers=headers)
37
+ html_content = response.text
38
+ print(html_content[:1000])
39
+
40
+ from bs4 import BeautifulSoup
41
+ soup = BeautifulSoup(html_content, "html.parser")
42
+ books = []
43
+ rows = soup.select("tr[itemtype='http://schema.org/Book']")
44
+
45
+ for row in rows:
46
+ title = row.select_one("a.bookTitle span")
47
+ author = row.select_one("a.authorName span")
48
+ rating = row.select_one("span.minirating")
49
+
50
+ if title and author and rating:
51
+ books.append({
52
+ "title": title.text.strip(),
53
+ "author": author.text.strip(),
54
+ "rating": rating.text.strip()
55
+ })
56
+
57
+ print(books[:3])
58
+
59
+ import pandas as pd
60
+ df = pd.DataFrame(books)
61
+ print(df.head())
62
+
63
+ books_context = "\n".join(
64
+ [f"{i+1}. {b['title']} by {b['author']} - {b['rating']}"
65
+ for i, b in enumerate(books)]
66
+ )
67
+
68
+ system_prompt = f"""
69
+ You are a book information assistant.
70
+ Here is the scraped Goodreads book ranking data:
71
+
72
+ {books_context}
73
+
74
+ Answer user questions ONLY using this data.
75
+ If information is not present, say you don't know.
76
+ """
77
+
78
+ # --- Groq API safe call function ---
79
+ def ask_groq(messages):
80
+ url = "https://api.groq.com/openai/v1/chat/completions"
81
+ headers = {
82
+ "Authorization": f"Bearer {GROQ_API_KEY}",
83
+ "Content-Type": "application/json"
84
+ }
85
+ payload = {
86
+ "model": "llama-3.1-8b-instant", # supported model
87
+ "messages": messages
88
+ }
89
+
90
+ try:
91
+ response = requests.post(url, headers=headers, json=payload)
92
+ except Exception as e:
93
+ return f"Error connecting to Groq API: {e}"
94
+
95
+ if response.status_code != 200:
96
+ return f"API error {response.status_code}: {response.text}"
97
+
98
+ try:
99
+ data = response.json()
100
+ except Exception as e:
101
+ return f"Error parsing JSON: {e}"
102
+
103
+ if "choices" in data and len(data["choices"]) > 0:
104
+ return data["choices"][0]["message"]["content"]
105
+ else:
106
+ return f"Unexpected API response: {data}"
107
+
108
+ # --- Gradio chatbot function ---
109
+ history = []
110
+
111
+ def chatbot(user_input, chat_history):
112
+ if chat_history is None:
113
+ chat_history = []
114
+
115
+ # Messages to send to Groq LLM
116
+ messages_for_groq = [{"role": "system", "content": system_prompt}]
117
+ # Add existing messages
118
+ for msg in chat_history:
119
+ messages_for_groq.append({"role": "user" if msg["role"]=="user" else "assistant", "content": msg["content"]})
120
+ # Add new user input
121
+ messages_for_groq.append({"role": "user", "content": str(user_input)})
122
+
123
+ # Call Groq API
124
+ response = ask_groq(messages_for_groq)
125
+
126
+ # Append new messages to chat_history
127
+ chat_history.append({"role": "user", "content": str(user_input)})
128
+ chat_history.append({"role": "assistant", "content": str(response)})
129
+
130
+ # Return two outputs: chat history (list of dicts) and empty string to clear textbox
131
+ return chat_history, ""
132
+
133
+ with gr.Blocks() as demo:
134
+ chatbot_output = gr.Chatbot() # expects list of dicts with 'role' and 'content'
135
+ user_input = gr.Textbox(placeholder="Ask about Goodreads books...")
136
+ send_btn = gr.Button("Send")
137
+
138
+ send_btn.click(chatbot, inputs=[user_input, chatbot_output], outputs=[chatbot_output, user_input])
139
+ if __name__ == "__main__":
140
+ demo.launch(share=False) # or True if you want a public link
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ requests
3
+ beautifulsoup4
4
+ pandas
5
+ python-dotenv