INLEXIO commited on
Commit
de16096
·
verified ·
1 Parent(s): 3885666

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +8 -5
src/streamlit_app.py CHANGED
@@ -44,8 +44,11 @@ st.set_page_config(
44
  # Cache the model loading
45
  @st.cache_resource
46
  def load_model():
47
- """Load the sentence transformer model"""
48
- return SentenceTransformer('all-MiniLM-L6-v2', cache_folder='/tmp/huggingface')
 
 
 
49
 
50
  # LIMITED CACHE: Only store 50 recent searches
51
  @st.cache_data(ttl=3600, max_entries=50, show_spinner=False)
@@ -270,7 +273,8 @@ def main():
270
  st.title("🔬 OpenAlex Semantic Search")
271
  st.markdown("""
272
  Search for research papers and discover top researchers using semantic similarity matching.
273
- This tool searches the OpenAlex database and ranks results by relevance, not just citations.
 
274
  """)
275
 
276
  # Sidebar configuration
@@ -537,5 +541,4 @@ def main():
537
  )
538
 
539
  if __name__ == "__main__":
540
- main()
541
-
 
44
  # Cache the model loading
45
  @st.cache_resource
46
  def load_model():
47
+ """Load the SPECTER model - trained specifically on scientific papers"""
48
+ # SPECTER is much better for scientific content than general models
49
+ # Model size: ~440MB (vs ~80MB for MiniLM)
50
+ # Embedding size: 768 dimensions (vs 384 for MiniLM)
51
+ return SentenceTransformer('allenai/specter', cache_folder='/tmp/huggingface')
52
 
53
  # LIMITED CACHE: Only store 50 recent searches
54
  @st.cache_data(ttl=3600, max_entries=50, show_spinner=False)
 
273
  st.title("🔬 OpenAlex Semantic Search")
274
  st.markdown("""
275
  Search for research papers and discover top researchers using semantic similarity matching.
276
+ This tool uses **SPECTER** (Scientific Paper Embeddings using Citation-informed TransformERs),
277
+ a model specifically trained on scientific papers for better relevance matching.
278
  """)
279
 
280
  # Sidebar configuration
 
541
  )
542
 
543
  if __name__ == "__main__":
544
+ main()