Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- app.py +14 -24
- flagged/log.csv +2 -0
app.py
CHANGED
|
@@ -10,6 +10,7 @@ import re, sys
|
|
| 10 |
from tensorflow.keras.models import load_model
|
| 11 |
import joblib
|
| 12 |
import gradio as gr
|
|
|
|
| 13 |
|
| 14 |
headers = {
|
| 15 |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36, Opera/9.80 (Windows NT 6.1; WOW64) Presto/2.12.388 Version/12.18'
|
|
@@ -76,6 +77,8 @@ def getReviews(soup, site, url):
|
|
| 76 |
text = []
|
| 77 |
for t in Review_text_sec:
|
| 78 |
text.append(t.text)
|
|
|
|
|
|
|
| 79 |
|
| 80 |
Rating = soup.find_all("div", {"class": ["_3LWZlK _1BLPMq", "_3LWZlK _32lA32 _1BLPMq", "_3LWZlK _1rdVr6 _1BLPMq"]})
|
| 81 |
rate = []
|
|
@@ -114,6 +117,7 @@ def getReviews(soup, site, url):
|
|
| 114 |
text = []
|
| 115 |
for t in Review_text_sec:
|
| 116 |
text.append(t.text.replace('\n', ''))
|
|
|
|
| 117 |
|
| 118 |
Rating = soup.find_all(attrs={"data-hook": "review-star-rating"})
|
| 119 |
rate = []
|
|
@@ -142,7 +146,6 @@ def getReviews(soup, site, url):
|
|
| 142 |
collate_df = pd.DataFrame.from_dict(collate)
|
| 143 |
return collate_df
|
| 144 |
|
| 145 |
-
|
| 146 |
def preprocess_text(text):
|
| 147 |
stemmer = snowballstemmer.EnglishStemmer()
|
| 148 |
text = " ".join(stemmer.stemWords(re.sub('[!"#%\'()*+,-./:;<=>?@[\\]^_`{|}~1234567890’”“′‘\\\\]', ' ', text).split(' ')))
|
|
@@ -166,10 +169,10 @@ def scraper(url):
|
|
| 166 |
df2 = []
|
| 167 |
soup = getsoup(url)
|
| 168 |
site = url.split('.')[1]
|
| 169 |
-
if site == 'flipkart':
|
| 170 |
-
|
| 171 |
-
elif site == 'amazon':
|
| 172 |
-
|
| 173 |
product = url.split('/')[3]
|
| 174 |
lastPage = 1
|
| 175 |
urllistPages = geturllist(url, lastPage)
|
|
@@ -214,26 +217,13 @@ def scraper(url):
|
|
| 214 |
arr = []
|
| 215 |
for i, j in enumerate(argMax):
|
| 216 |
if j == 2 or j == 1:
|
| 217 |
-
arr.append(
|
| 218 |
-
return
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
# @app.route('/', methods=['GET'])
|
| 222 |
-
# def index():
|
| 223 |
-
# results = []
|
| 224 |
-
# if request.args.get('url'):
|
| 225 |
-
# results = scraper(request.args.get('url'))
|
| 226 |
-
# return results
|
| 227 |
-
|
| 228 |
-
# if __name__ == "__main__":
|
| 229 |
-
# app.run(debug=True)
|
| 230 |
|
| 231 |
def index(img_url):
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
return scraper(img_url)
|
| 236 |
-
|
| 237 |
|
| 238 |
inputs_image_url = [
|
| 239 |
gr.Textbox(type="text", label="Image URL"),
|
|
@@ -254,4 +244,4 @@ interface_image_url = gr.Interface(
|
|
| 254 |
gr.TabbedInterface(
|
| 255 |
[interface_image_url],
|
| 256 |
tab_names=['Reviews inference']
|
| 257 |
-
).queue().launch()
|
|
|
|
| 10 |
from tensorflow.keras.models import load_model
|
| 11 |
import joblib
|
| 12 |
import gradio as gr
|
| 13 |
+
import json
|
| 14 |
|
| 15 |
headers = {
|
| 16 |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36, Opera/9.80 (Windows NT 6.1; WOW64) Presto/2.12.388 Version/12.18'
|
|
|
|
| 77 |
text = []
|
| 78 |
for t in Review_text_sec:
|
| 79 |
text.append(t.text)
|
| 80 |
+
|
| 81 |
+
print(Review_text_sec)
|
| 82 |
|
| 83 |
Rating = soup.find_all("div", {"class": ["_3LWZlK _1BLPMq", "_3LWZlK _32lA32 _1BLPMq", "_3LWZlK _1rdVr6 _1BLPMq"]})
|
| 84 |
rate = []
|
|
|
|
| 117 |
text = []
|
| 118 |
for t in Review_text_sec:
|
| 119 |
text.append(t.text.replace('\n', ''))
|
| 120 |
+
print(Review_text_sec)
|
| 121 |
|
| 122 |
Rating = soup.find_all(attrs={"data-hook": "review-star-rating"})
|
| 123 |
rate = []
|
|
|
|
| 146 |
collate_df = pd.DataFrame.from_dict(collate)
|
| 147 |
return collate_df
|
| 148 |
|
|
|
|
| 149 |
def preprocess_text(text):
|
| 150 |
stemmer = snowballstemmer.EnglishStemmer()
|
| 151 |
text = " ".join(stemmer.stemWords(re.sub('[!"#%\'()*+,-./:;<=>?@[\\]^_`{|}~1234567890’”“′‘\\\\]', ' ', text).split(' ')))
|
|
|
|
| 169 |
df2 = []
|
| 170 |
soup = getsoup(url)
|
| 171 |
site = url.split('.')[1]
|
| 172 |
+
# if site == 'flipkart':
|
| 173 |
+
# url = url + '&page=1'
|
| 174 |
+
# elif site == 'amazon':
|
| 175 |
+
# url = url + '&pageNumber=1'
|
| 176 |
product = url.split('/')[3]
|
| 177 |
lastPage = 1
|
| 178 |
urllistPages = geturllist(url, lastPage)
|
|
|
|
| 217 |
arr = []
|
| 218 |
for i, j in enumerate(argMax):
|
| 219 |
if j == 2 or j == 1:
|
| 220 |
+
arr.append(i)
|
| 221 |
+
return {'class': 'review-text-content', 'indices': arr}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
|
| 223 |
def index(img_url):
|
| 224 |
+
results = scraper(img_url)
|
| 225 |
+
print(results)
|
| 226 |
+
return json.dumps(results)
|
|
|
|
|
|
|
| 227 |
|
| 228 |
inputs_image_url = [
|
| 229 |
gr.Textbox(type="text", label="Image URL"),
|
|
|
|
| 244 |
gr.TabbedInterface(
|
| 245 |
[interface_image_url],
|
| 246 |
tab_names=['Reviews inference']
|
| 247 |
+
).queue().launch()
|
flagged/log.csv
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Image URL,Result Dictionary,flag,username,timestamp
|
| 2 |
+
,,,,2024-01-30 14:40:30.105261
|