Spaces:
Sleeping
Sleeping
Commit ·
78489d3
1
Parent(s): b5b2749
Upload 4 files
Browse files- chatbot.py +45 -0
- finetuned.py +11 -0
- scraper_script.py +53 -0
- trends.py +50 -0
chatbot.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from finetuned import get_search_query
|
| 3 |
+
from scraper_script import scrape_flipkart_products
|
| 4 |
+
from itertools import islice
|
| 5 |
+
from trends import get_trending_brands
|
| 6 |
+
with gr.Blocks() as demo:
|
| 7 |
+
chatbot = gr.Chatbot().style(height=750)
|
| 8 |
+
msg = gr.Textbox(label="Prompt")
|
| 9 |
+
clear = gr.ClearButton([msg, chatbot])
|
| 10 |
+
|
| 11 |
+
def respond(message, chat_history):
|
| 12 |
+
search_query = get_search_query(message).split('\n', 1)[0].strip()
|
| 13 |
+
print(search_query)
|
| 14 |
+
user_info = {
|
| 15 |
+
'name': 'John Doe',
|
| 16 |
+
'gender': 'Male',
|
| 17 |
+
'age': 34,
|
| 18 |
+
'location': 'Hyderabad'
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
popular_brands = get_trending_brands()[:5]
|
| 22 |
+
# popular_brands = ['Nike', 'Jockey']
|
| 23 |
+
scraped_results = scrape_flipkart_products(search_query, user_info, popular_brands)
|
| 24 |
+
|
| 25 |
+
if(len(scraped_results) == 0):
|
| 26 |
+
bot_message = "I'm sorry, I couldn't find anything."
|
| 27 |
+
else:
|
| 28 |
+
bot_message = "## Understood! Here are my recommendations:\n\n"
|
| 29 |
+
for entry in islice(scraped_results, 3):
|
| 30 |
+
bot_message += f" \n"
|
| 31 |
+
if(entry['Brand']!='N/A'):
|
| 32 |
+
bot_message += f"**Brand:** {entry['Brand']} \n"
|
| 33 |
+
bot_message += f"**Description:** {entry['Description']} \n"
|
| 34 |
+
bot_message += f"**Price:** {entry['Price']} \n"
|
| 35 |
+
bot_message += f"**Deliver By:** {entry['Delivery_Date']} \n"
|
| 36 |
+
bot_message += f"**Link:** <a href='{entry['Link']}' target='_blank'>Link</a> \n"
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
chat_history.append((message, bot_message))
|
| 40 |
+
return "", chat_history
|
| 41 |
+
|
| 42 |
+
msg.submit(respond, [msg, chatbot], [msg, chatbot])
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
demo.launch()
|
finetuned.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import openai
|
| 2 |
+
|
| 3 |
+
openai.api_key = "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"
|
| 4 |
+
def get_search_query(user_message):
|
| 5 |
+
reponse = openai.Completion.create(
|
| 6 |
+
model="ada:ft-personal-2023-08-20-13-35-30",
|
| 7 |
+
prompt=user_message+" \n\n###\n\n",
|
| 8 |
+
stop=[" \n"]
|
| 9 |
+
)
|
| 10 |
+
|
| 11 |
+
return reponse["choices"][0]["text"]
|
scraper_script.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
from bs4 import BeautifulSoup
|
| 3 |
+
import pandas as pd
|
| 4 |
+
|
| 5 |
+
def scrape_flipkart_products(search_query, filters, brands):
|
| 6 |
+
gender = filters.get('gender', '').lower()
|
| 7 |
+
url = "https://www.flipkart.com/search?q="
|
| 8 |
+
if search_query:
|
| 9 |
+
url += f"{search_query.replace(' ', '+')}"
|
| 10 |
+
if brands:
|
| 11 |
+
for brand in brands:
|
| 12 |
+
brand=brand.split(" ")
|
| 13 |
+
brand="%2B".join(brand)
|
| 14 |
+
url += f"&p%5B%5D=facets.brand%255B%255D%3D{brand}"
|
| 15 |
+
if gender == 'Male':
|
| 16 |
+
url += "&p[]=facets.ideal_for%255B%255D%3DMen"
|
| 17 |
+
elif gender == 'Female':
|
| 18 |
+
url += "&p[]=facets.ideal_for%255B%255D%3DWomen"
|
| 19 |
+
print(url)
|
| 20 |
+
response = requests.get(url)
|
| 21 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
| 22 |
+
|
| 23 |
+
products = []
|
| 24 |
+
|
| 25 |
+
for product in soup.find_all('div', class_='_1xHGtK'):
|
| 26 |
+
brand_element = product.find('div', class_='_2WkVRV')
|
| 27 |
+
brand = brand_element.text if brand_element else "N/A"
|
| 28 |
+
|
| 29 |
+
link_element = product.find('a', class_='_2UzuFa')
|
| 30 |
+
link = "https://www.flipkart.com" + link_element['href'] if link_element else "N/A"
|
| 31 |
+
|
| 32 |
+
image_element = product.find('img')
|
| 33 |
+
image = image_element['src'] if image_element else "N/A"
|
| 34 |
+
|
| 35 |
+
description_element = product.find('a', class_='IRpwTa')
|
| 36 |
+
description = description_element.text if description_element else "N/A"
|
| 37 |
+
|
| 38 |
+
price_element = product.find('div', class_='_30jeq3')
|
| 39 |
+
price = price_element.text.replace('₹', '').replace(',', '') if price_element else "N/A"
|
| 40 |
+
|
| 41 |
+
delivery_date_element = product.find('span', class_="_1TPvTK")
|
| 42 |
+
delivery_date = delivery_date_element.text if delivery_date_element else "N/A"
|
| 43 |
+
|
| 44 |
+
products.append({
|
| 45 |
+
'Brand': brand,
|
| 46 |
+
'Link': link,
|
| 47 |
+
'Image': image,
|
| 48 |
+
'Description': description,
|
| 49 |
+
'Price': price,
|
| 50 |
+
'Delivery_Date': delivery_date
|
| 51 |
+
})
|
| 52 |
+
|
| 53 |
+
return products
|
trends.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pytrends.request import TrendReq
|
| 2 |
+
import pandas as pd
|
| 3 |
+
|
| 4 |
+
all_keywords = [
|
| 5 |
+
"Allen Solly",
|
| 6 |
+
"Van Heusen",
|
| 7 |
+
"Peter England",
|
| 8 |
+
"Adidas",
|
| 9 |
+
"Louis Philippe",
|
| 10 |
+
"Biba",
|
| 11 |
+
"Jockey",
|
| 12 |
+
"Levi's",
|
| 13 |
+
"Nike",
|
| 14 |
+
"Puma",
|
| 15 |
+
"Reebok",
|
| 16 |
+
"United Colors of Benetton",
|
| 17 |
+
"HRX",
|
| 18 |
+
"Forever 21",
|
| 19 |
+
"Arrow",
|
| 20 |
+
"Raymond",
|
| 21 |
+
"Park Avenue",
|
| 22 |
+
"Wrangler",
|
| 23 |
+
"Pepe Jeans",
|
| 24 |
+
"Jack & Jones",
|
| 25 |
+
"Indian Terrain",
|
| 26 |
+
"Spykar",
|
| 27 |
+
"Wildcraft",
|
| 28 |
+
"Calvin Klein Jeans",
|
| 29 |
+
"Tommy Hilfiger",
|
| 30 |
+
]
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def get_trending_brands():
|
| 34 |
+
pytrends = TrendReq(hl='en-US', tz=360)
|
| 35 |
+
|
| 36 |
+
chunk_size = 5
|
| 37 |
+
keyword_chunks = [all_keywords[i:i+chunk_size] for i in range(0, len(all_keywords), chunk_size)]
|
| 38 |
+
popularity_scores = []
|
| 39 |
+
|
| 40 |
+
for chunk in keyword_chunks:
|
| 41 |
+
pytrends.build_payload(chunk, timeframe='now 7-d', geo='IN')
|
| 42 |
+
data = pytrends.interest_over_time()
|
| 43 |
+
chunk_popularity = data.mean().sort_values(ascending=False)
|
| 44 |
+
popularity_scores.append(chunk_popularity)
|
| 45 |
+
|
| 46 |
+
combined_scores = pd.concat(popularity_scores, axis=1)
|
| 47 |
+
|
| 48 |
+
overall_scores = combined_scores.mean(axis=1).sort_values(ascending=False)
|
| 49 |
+
ordered_keywords = overall_scores.index.tolist()
|
| 50 |
+
return ordered_keywords
|