NealCaren commited on
Commit
52a7c4d
·
1 Parent(s): 3f62198
Files changed (1) hide show
  1. app.py +19 -5
app.py CHANGED
@@ -13,6 +13,9 @@ import io
13
 
14
  from nltk.tokenize import sent_tokenize
15
 
 
 
 
16
  import nltk
17
  nltk.download('punkt')
18
 
@@ -48,9 +51,14 @@ def sent_cross_load():
48
 
49
  @st.cache
50
  def load_data():
51
- #dfs = [pd.read_json(f'passages_{i}.jsonl', lines=True) for i in range(0,5)]
52
- #df = pd.concat(dfs)
53
- df = pd.read_json('https://www.dropbox.com/s/82lwbaym3b1o6uq/passages.jsonl?raw=1', lines=True)
 
 
 
 
 
54
  df.reset_index(inplace=True, drop=True)
55
  return df
56
 
@@ -63,8 +71,14 @@ with st.spinner(text="Loading data..."):
63
  def load_embeddings():
64
  #efs = [np.load(f'embeddings_{i}.pt.npy') for i in range(0,5)]
65
  #corpus_embeddings = np.concatenate(efs)
66
- response = requests.get("https://www.dropbox.com/s/px8kjdd3p5mzw6j/corpus_embeddings.pt.npy?raw=1")
67
- corpus_embeddings = np.load(io.BytesIO(response.content))
 
 
 
 
 
 
68
 
69
 
70
  return corpus_embeddings
 
13
 
14
  from nltk.tokenize import sent_tokenize
15
 
16
+ import gdown
17
+
18
+
19
  import nltk
20
  nltk.download('punkt')
21
 
 
51
 
52
  @st.cache
53
  def load_data():
54
+ #df = pd.read_json('https://www.dropbox.com/s/82lwbaym3b1o6uq/passages.jsonl?raw=1', lines=True)
55
+
56
+ url = "https://drive.google.com/uc?export=download&id=1nIBS9is8YCeiPBqA7MifVC5xeaKWH8uL"
57
+ output = "passages.jsonl"
58
+ gdown.download(url, output, quiet=False)
59
+
60
+ df = pd.read_json(output, lines=True)
61
+
62
  df.reset_index(inplace=True, drop=True)
63
  return df
64
 
 
71
  def load_embeddings():
72
  #efs = [np.load(f'embeddings_{i}.pt.npy') for i in range(0,5)]
73
  #corpus_embeddings = np.concatenate(efs)
74
+
75
+ url = "https://drive.google.com/uc?export=download&id=1z9eoBI07p_YtrdK1ZWZeCRT5T5mu5nhV"
76
+ output = "embeddings.npy"
77
+ gdown.download(url, output, quiet=False)
78
+
79
+ corpus_embeddings = np.load(output)
80
+ #response = requests.get("https://www.dropbox.com/s/px8kjdd3p5mzw6j/corpus_embeddings.pt.npy?raw=1")
81
+ #corpus_embeddings = np.load(io.BytesIO(response.content))
82
 
83
 
84
  return corpus_embeddings