NTDuy commited on
Commit
6ce99db
·
verified ·
1 Parent(s): cfb4c58

fix crawler to tiki, depreciating Shopee Crawl

Browse files
Files changed (1) hide show
  1. app.py +7 -6
app.py CHANGED
@@ -3,6 +3,7 @@ import pandas as pd
3
  import streamlit as st
4
  import re
5
  from data_crawler.Shopee_crawl import ShopeeCrawler
 
6
  import plotly.express as px
7
  import plotly.graph_objects as go
8
  from utils.data_preprocessing import cleaning, cleaning_for_phobert
@@ -12,10 +13,10 @@ from graphs import *
12
  st.set_page_config(layout="wide")
13
 
14
  MODEL_PATH = "./Phobert-base-v2-shopee"
15
- tokenizer_option = st.sidebar.selectbox("Select tokenizer", ["underthesea (fast)", "VnCoreNLP (recommended)"])
16
- if tokenizer_option == "VnCoreNLP (recommended)":
17
  TOKENIZE_PATH = "./vncorenlp/VnCoreNLP-1.1.1.jar"
18
- elif tokenizer_option == "underthesea (fast)":
19
  TOKENIZE_PATH = "underthesea"
20
 
21
  buffer = io.BytesIO()
@@ -62,13 +63,13 @@ if df is None:
62
  df = get_data_from_file(uploaded_file)
63
  else:
64
  crawler = ShopeeCrawler()
65
- link = st.sidebar.text_input("$\\textsf{\Large Enter Product Link}$")
66
 
67
  if link:
 
68
  @st.cache_data
69
  def get_data_from_link():
70
- shop_id, item_id = crawler.get_ids_from_link(link)
71
- data = crawler.Crawl(item_id, shop_id)
72
  df = pd.DataFrame(data)
73
  return df
74
  data = get_data_from_link()
 
3
  import streamlit as st
4
  import re
5
  from data_crawler.Shopee_crawl import ShopeeCrawler
6
+ from data_crawler.Tiki_Crawl import *
7
  import plotly.express as px
8
  import plotly.graph_objects as go
9
  from utils.data_preprocessing import cleaning, cleaning_for_phobert
 
13
  st.set_page_config(layout="wide")
14
 
15
  MODEL_PATH = "./Phobert-base-v2-shopee"
16
+ tokenizer_option = st.sidebar.selectbox("Select tokenizer", ["underthesea", "VnCoreNLP"])
17
+ if tokenizer_option == "VnCoreNLP":
18
  TOKENIZE_PATH = "./vncorenlp/VnCoreNLP-1.1.1.jar"
19
+ elif tokenizer_option == "underthesea":
20
  TOKENIZE_PATH = "underthesea"
21
 
22
  buffer = io.BytesIO()
 
63
  df = get_data_from_file(uploaded_file)
64
  else:
65
  crawler = ShopeeCrawler()
66
+ link = st.sidebar.text_input("$\\textsf{\Large Enter Product Link (Tiki only)}$")
67
 
68
  if link:
69
+ n_pages = st.sidebar.slider(label = "Number of pages to crawl", min_value=1, max_value=5)
70
  @st.cache_data
71
  def get_data_from_link():
72
+ data = crawl_tiki(link, pages = int(n_pages))
 
73
  df = pd.DataFrame(data)
74
  return df
75
  data = get_data_from_link()