Spaces:

INLEXIO
/

semantic-search

Sleeping

INLEXIO commited on Oct 24, 2025

Commit

de16096

verified ·

1 Parent(s): 3885666

Update src/streamlit_app.py

Files changed (1) hide show

src/streamlit_app.py CHANGED Viewed

@@ -44,8 +44,11 @@ st.set_page_config(
 # Cache the model loading
 @st.cache_resource
 def load_model():
-    """Load the sentence transformer model"""
-    return SentenceTransformer('all-MiniLM-L6-v2', cache_folder='/tmp/huggingface')
 # LIMITED CACHE: Only store 50 recent searches
 @st.cache_data(ttl=3600, max_entries=50, show_spinner=False)
@@ -270,7 +273,8 @@ def main():
     st.title("🔬 OpenAlex Semantic Search")
     st.markdown("""
     Search for research papers and discover top researchers using semantic similarity matching.
-    This tool searches the OpenAlex database and ranks results by relevance, not just citations.
     """)
     # Sidebar configuration
@@ -537,5 +541,4 @@ def main():
         )
 if __name__ == "__main__":
-    main()

 # Cache the model loading
 @st.cache_resource
 def load_model():
+    """Load the SPECTER model - trained specifically on scientific papers"""
+    # SPECTER is much better for scientific content than general models
+    # Model size: ~440MB (vs ~80MB for MiniLM)
+    # Embedding size: 768 dimensions (vs 384 for MiniLM)
+    return SentenceTransformer('allenai/specter', cache_folder='/tmp/huggingface')
 # LIMITED CACHE: Only store 50 recent searches
 @st.cache_data(ttl=3600, max_entries=50, show_spinner=False)
     st.title("🔬 OpenAlex Semantic Search")
     st.markdown("""
     Search for research papers and discover top researchers using semantic similarity matching.
+    This tool uses **SPECTER** (Scientific Paper Embeddings using Citation-informed TransformERs),
+    a model specifically trained on scientific papers for better relevance matching.
     """)
     # Sidebar configuration
         )
 if __name__ == "__main__":
+    main()