Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| import nltk | |
| from nltk.stem.snowball import SnowballStemmer | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| import streamlit as st | |
| from PIL import Image | |
| nltk.download('punkt') | |
| # Download required NLTK data | |
| try: | |
| nltk.download('punkt') | |
| except: | |
| pass | |
| # Load the dataset | |
| data = pd.read_csv('amazon_product.csv') | |
| # Remove unnecessary columns | |
| data = data.drop('id', axis=1) | |
| # tokenizer and stemmer | |
| stemmer = SnowballStemmer('english') | |
| def tokenize_and_stem(text): | |
| tokens = nltk.word_tokenize(text.lower()) | |
| stems = [stemmer.stem(t) for t in tokens] | |
| return stems | |
| # stemmed tokens column | |
| data['stemmed_tokens'] = data.apply(lambda row: tokenize_and_stem(row['Title'] + ' ' + row['Description']), axis=1) | |
| # TF-IDF vectorizer and cosine similarity function | |
| tfidf_vectorizer = TfidfVectorizer(tokenizer=tokenize_and_stem) | |
| def cosine_sim(text1, text2): | |
| # tfidf_matrix = tfidf_vectorizer.fit_transform([text1, text2]) | |
| text1_concatenated = ' '.join(text1) | |
| text2_concatenated = ' '.join(text2) | |
| tfidf_matrix = tfidf_vectorizer.fit_transform([text1_concatenated, text2_concatenated]) | |
| return cosine_similarity(tfidf_matrix)[0][1] | |
| # search function | |
| def search_products(query): | |
| query_stemmed = tokenize_and_stem(query) | |
| data['similarity'] = data['stemmed_tokens'].apply(lambda x: cosine_sim(query_stemmed, x)) | |
| results = data.sort_values(by=['similarity'], ascending=False).head(10)[['Title', 'Description', 'Category']] | |
| return results | |
| # web app | |
| img = Image.open('download.png') | |
| st.image(img,width=600) | |
| st.title("Intelligent Product Finder for Amazon") | |
| query = st.text_input("Enter Product Name") | |
| sumbit = st.button('Search') | |
| if sumbit: | |
| res = search_products(query) | |
| st.write(res) | |