fix crawler to tiki, depreciating Shopee Crawl
Browse files
app.py
CHANGED
|
@@ -3,6 +3,7 @@ import pandas as pd
|
|
| 3 |
import streamlit as st
|
| 4 |
import re
|
| 5 |
from data_crawler.Shopee_crawl import ShopeeCrawler
|
|
|
|
| 6 |
import plotly.express as px
|
| 7 |
import plotly.graph_objects as go
|
| 8 |
from utils.data_preprocessing import cleaning, cleaning_for_phobert
|
|
@@ -12,10 +13,10 @@ from graphs import *
|
|
| 12 |
st.set_page_config(layout="wide")
|
| 13 |
|
| 14 |
MODEL_PATH = "./Phobert-base-v2-shopee"
|
| 15 |
-
tokenizer_option = st.sidebar.selectbox("Select tokenizer", ["underthesea
|
| 16 |
-
if tokenizer_option == "VnCoreNLP
|
| 17 |
TOKENIZE_PATH = "./vncorenlp/VnCoreNLP-1.1.1.jar"
|
| 18 |
-
elif tokenizer_option == "underthesea
|
| 19 |
TOKENIZE_PATH = "underthesea"
|
| 20 |
|
| 21 |
buffer = io.BytesIO()
|
|
@@ -62,13 +63,13 @@ if df is None:
|
|
| 62 |
df = get_data_from_file(uploaded_file)
|
| 63 |
else:
|
| 64 |
crawler = ShopeeCrawler()
|
| 65 |
-
link = st.sidebar.text_input("$\\textsf{\Large Enter Product Link}$")
|
| 66 |
|
| 67 |
if link:
|
|
|
|
| 68 |
@st.cache_data
|
| 69 |
def get_data_from_link():
|
| 70 |
-
|
| 71 |
-
data = crawler.Crawl(item_id, shop_id)
|
| 72 |
df = pd.DataFrame(data)
|
| 73 |
return df
|
| 74 |
data = get_data_from_link()
|
|
|
|
| 3 |
import streamlit as st
|
| 4 |
import re
|
| 5 |
from data_crawler.Shopee_crawl import ShopeeCrawler
|
| 6 |
+
from data_crawler.Tiki_Crawl import *
|
| 7 |
import plotly.express as px
|
| 8 |
import plotly.graph_objects as go
|
| 9 |
from utils.data_preprocessing import cleaning, cleaning_for_phobert
|
|
|
|
| 13 |
st.set_page_config(layout="wide")
|
| 14 |
|
| 15 |
MODEL_PATH = "./Phobert-base-v2-shopee"
|
| 16 |
+
tokenizer_option = st.sidebar.selectbox("Select tokenizer", ["underthesea", "VnCoreNLP"])
|
| 17 |
+
if tokenizer_option == "VnCoreNLP":
|
| 18 |
TOKENIZE_PATH = "./vncorenlp/VnCoreNLP-1.1.1.jar"
|
| 19 |
+
elif tokenizer_option == "underthesea":
|
| 20 |
TOKENIZE_PATH = "underthesea"
|
| 21 |
|
| 22 |
buffer = io.BytesIO()
|
|
|
|
| 63 |
df = get_data_from_file(uploaded_file)
|
| 64 |
else:
|
| 65 |
crawler = ShopeeCrawler()
|
| 66 |
+
link = st.sidebar.text_input("$\\textsf{\Large Enter Product Link (Tiki only)}$")
|
| 67 |
|
| 68 |
if link:
|
| 69 |
+
n_pages = st.sidebar.slider(label = "Number of pages to crawl", min_value=1, max_value=5)
|
| 70 |
@st.cache_data
|
| 71 |
def get_data_from_link():
|
| 72 |
+
data = crawl_tiki(link, pages = int(n_pages))
|
|
|
|
| 73 |
df = pd.DataFrame(data)
|
| 74 |
return df
|
| 75 |
data = get_data_from_link()
|