NealCaren commited on
Commit
432614e
·
1 Parent(s): 6f46caa

Updated to read files from the web

Browse files
Files changed (1) hide show
  1. app.py +12 -4
app.py CHANGED
@@ -8,6 +8,9 @@ from collections import OrderedDict
8
  from sentence_transformers import SentenceTransformer, CrossEncoder, util
9
  import torch
10
 
 
 
 
11
  from nltk.tokenize import sent_tokenize
12
 
13
  import nltk
@@ -47,8 +50,9 @@ def sent_cross_load():
47
 
48
  @st.cache
49
  def load_data():
50
- dfs = [pd.read_json(f'passages_{i}.jsonl', lines=True) for i in range(0,5)]
51
- df = pd.concat(dfs)
 
52
  df.reset_index(inplace=True, drop=True)
53
  return df
54
 
@@ -59,8 +63,12 @@ with st.spinner(text="Loading data..."):
59
 
60
  @st.cache
61
  def load_embeddings():
62
- efs = [np.load(f'embeddings_{i}.pt.npy') for i in range(0,5)]
63
- corpus_embeddings = np.concatenate(efs)
 
 
 
 
64
  return corpus_embeddings
65
 
66
  with st.spinner(text="Loading embeddings..."):
 
8
  from sentence_transformers import SentenceTransformer, CrossEncoder, util
9
  import torch
10
 
11
+ import requests
12
+ import io
13
+
14
  from nltk.tokenize import sent_tokenize
15
 
16
  import nltk
 
50
 
51
  @st.cache
52
  def load_data():
53
+ #dfs = [pd.read_json(f'passages_{i}.jsonl', lines=True) for i in range(0,5)]
54
+ #df = pd.concat(dfs)
55
+ df = pd.read_json('https://www.dropbox.com/s/82lwbaym3b1o6uq/passages.jsonl?raw=1', lines=True)
56
  df.reset_index(inplace=True, drop=True)
57
  return df
58
 
 
63
 
64
  @st.cache
65
  def load_embeddings():
66
+ #efs = [np.load(f'embeddings_{i}.pt.npy') for i in range(0,5)]
67
+ #corpus_embeddings = np.concatenate(efs)
68
+ response = requests.get('"https://www.dropbox.com/s/px8kjdd3p5mzw6j/corpus_embeddings.pt.npy?raw=1"')
69
+ corpus_embeddings = np.load(io.BytesIO(response.content))
70
+
71
+
72
  return corpus_embeddings
73
 
74
  with st.spinner(text="Loading embeddings..."):