Spaces:
Sleeping
Sleeping
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +8 -5
src/streamlit_app.py
CHANGED
|
@@ -44,8 +44,11 @@ st.set_page_config(
|
|
| 44 |
# Cache the model loading
|
| 45 |
@st.cache_resource
|
| 46 |
def load_model():
|
| 47 |
-
"""Load the
|
| 48 |
-
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
# LIMITED CACHE: Only store 50 recent searches
|
| 51 |
@st.cache_data(ttl=3600, max_entries=50, show_spinner=False)
|
|
@@ -270,7 +273,8 @@ def main():
|
|
| 270 |
st.title("🔬 OpenAlex Semantic Search")
|
| 271 |
st.markdown("""
|
| 272 |
Search for research papers and discover top researchers using semantic similarity matching.
|
| 273 |
-
This tool
|
|
|
|
| 274 |
""")
|
| 275 |
|
| 276 |
# Sidebar configuration
|
|
@@ -537,5 +541,4 @@ def main():
|
|
| 537 |
)
|
| 538 |
|
| 539 |
if __name__ == "__main__":
|
| 540 |
-
main()
|
| 541 |
-
|
|
|
|
| 44 |
# Cache the model loading
|
| 45 |
@st.cache_resource
|
| 46 |
def load_model():
|
| 47 |
+
"""Load the SPECTER model - trained specifically on scientific papers"""
|
| 48 |
+
# SPECTER is much better for scientific content than general models
|
| 49 |
+
# Model size: ~440MB (vs ~80MB for MiniLM)
|
| 50 |
+
# Embedding size: 768 dimensions (vs 384 for MiniLM)
|
| 51 |
+
return SentenceTransformer('allenai/specter', cache_folder='/tmp/huggingface')
|
| 52 |
|
| 53 |
# LIMITED CACHE: Only store 50 recent searches
|
| 54 |
@st.cache_data(ttl=3600, max_entries=50, show_spinner=False)
|
|
|
|
| 273 |
st.title("🔬 OpenAlex Semantic Search")
|
| 274 |
st.markdown("""
|
| 275 |
Search for research papers and discover top researchers using semantic similarity matching.
|
| 276 |
+
This tool uses **SPECTER** (Scientific Paper Embeddings using Citation-informed TransformERs),
|
| 277 |
+
a model specifically trained on scientific papers for better relevance matching.
|
| 278 |
""")
|
| 279 |
|
| 280 |
# Sidebar configuration
|
|
|
|
| 541 |
)
|
| 542 |
|
| 543 |
if __name__ == "__main__":
|
| 544 |
+
main()
|
|
|