Spaces:
Sleeping
Sleeping
Update search_utils.py
Browse files- search_utils.py +1 -12
search_utils.py
CHANGED
|
@@ -278,17 +278,6 @@ class MetadataManager:
|
|
| 278 |
return data["data"][0].get("url", "")
|
| 279 |
return ""
|
| 280 |
|
| 281 |
-
def _format_source_links(self, links):
|
| 282 |
-
"""Generate an HTML snippet for the available source links."""
|
| 283 |
-
html_parts = []
|
| 284 |
-
if "arxiv" in links:
|
| 285 |
-
html_parts.append(f"<a class='source-link' href='{links['arxiv']}' target='_blank' rel='noopener noreferrer'> π arXiv</a>")
|
| 286 |
-
if "semantic" in links:
|
| 287 |
-
html_parts.append(f"<a class='source-link' href='{links['semantic']}' target='_blank' rel='noopener noreferrer'> π Semantic Scholar</a>")
|
| 288 |
-
if "google" in links:
|
| 289 |
-
html_parts.append(f"<a class='source-link' href='{links['google']}' target='_blank' rel='noopener noreferrer'> π Google Scholar</a>")
|
| 290 |
-
return " | ".join(html_parts)
|
| 291 |
-
|
| 292 |
|
| 293 |
class SemanticSearch:
|
| 294 |
def __init__(self):
|
|
@@ -441,7 +430,7 @@ class SemanticSearch:
|
|
| 441 |
f"max={results['similarity'].max():.3f}, " +
|
| 442 |
f"mean={results['similarity'].mean():.3f}")
|
| 443 |
results['source'] = results['title'].apply(
|
| 444 |
-
lambda title: self._format_source_links(self.metadata_mgr.
|
| 445 |
)
|
| 446 |
pre_dedup = len(results)
|
| 447 |
results = results.drop_duplicates(subset=["title", "source"]).sort_values("similarity", ascending=False).head(top_k)
|
|
|
|
| 278 |
return data["data"][0].get("url", "")
|
| 279 |
return ""
|
| 280 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
|
| 282 |
class SemanticSearch:
|
| 283 |
def __init__(self):
|
|
|
|
| 430 |
f"max={results['similarity'].max():.3f}, " +
|
| 431 |
f"mean={results['similarity'].mean():.3f}")
|
| 432 |
results['source'] = results['title'].apply(
|
| 433 |
+
lambda title: self._format_source_links(self.metadata_mgr.resolve_url(title))
|
| 434 |
)
|
| 435 |
pre_dedup = len(results)
|
| 436 |
results = results.drop_duplicates(subset=["title", "source"]).sort_values("similarity", ascending=False).head(top_k)
|