Update README.md
Browse files
README.md
CHANGED
|
@@ -8,36 +8,12 @@ datasets:
|
|
| 8 |
- CCRss/arxiv_papers_cs
|
| 9 |
---
|
| 10 |
|
| 11 |
-
|
| 12 |
-
<html>
|
| 13 |
-
<head>
|
| 14 |
-
<meta charset="UTF-8">
|
| 15 |
-
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 16 |
-
<meta http-equiv="X-UA-Compatible" content="ie=edge">
|
| 17 |
-
<title>Markmap</title>
|
| 18 |
-
<style>
|
| 19 |
-
* {
|
| 20 |
-
margin: 0;
|
| 21 |
-
padding: 0;
|
| 22 |
-
}
|
| 23 |
-
#mindmap {
|
| 24 |
-
display: block;
|
| 25 |
-
width: 100vw;
|
| 26 |
-
height: 100vh;
|
| 27 |
-
}
|
| 28 |
-
</style>
|
| 29 |
-
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/markmap-toolbar@0.16.0/dist/style.css">
|
| 30 |
-
</head>
|
| 31 |
-
<body>
|
| 32 |
-
<svg id="mindmap"></svg>
|
| 33 |
-
<script src="https://cdn.jsdelivr.net/npm/d3@7.8.5/dist/d3.min.js"></script><script src="https://cdn.jsdelivr.net/npm/markmap-view@0.16.0/dist/browser/index.js"></script><script src="https://cdn.jsdelivr.net/npm/markmap-toolbar@0.16.0/dist/index.js"></script><script>(()=>{setTimeout(()=>{const{markmap:H,mm:ae}=window,W=new H.Toolbar;W.attach(ae);const we=W.render();we.setAttribute("style","position:absolute;bottom:20px;right:20px"),document.body.append(we)})})()</script><script>((o,T,c,r)=>{const g=o();window.mm=g.Markmap.create("svg#mindmap",(T||g.deriveOptions)(r),c)})(()=>window.markmap,null,{"content":"Top2Vec Model for Scientific Texts","children":[{"content":"Overview","children":[{"content":"<strong>Purpose:</strong> Analyze scientific texts for topic modeling and semantic search.","children":[],"payload":{"lines":"3,4"}},{"content":"<strong>Model:</strong> Top2Vec with Universal Sentence Encoder.","children":[],"payload":{"lines":"4,5"}},{"content":"<strong>Domain:</strong> Scientific literature, particularly UAV applications in disaster and emergency situations.","children":[],"payload":{"lines":"5,7"}}],"payload":{"lines":"2,3"}},{"content":"Installation","children":[{"content":"<code>pip install top2vec</code>","children":[],"payload":{"lines":"8,9"}},{"content":"<code>pip install tensorflow==2.8.0</code>","children":[],"payload":{"lines":"9,10"}},{"content":"<code>pip install tensorflow-probability==0.16.0</code>","children":[],"payload":{"lines":"10,12"}}],"payload":{"lines":"7,8"}},{"content":"Usage","children":[{"content":"<strong>Load Documents:</strong>","children":[{"content":"Source: arXiv articles in Computer Science.","children":[],"payload":{"lines":"14,15"}},{"content":"Time Frame: 2010 to 2023.","children":[],"payload":{"lines":"15,16"}}],"payload":{"lines":"13,16"}},{"content":"<strong>Initialize Model:</strong>","children":[{"content":"<code>Top2Vec(documents=docs, embedding_model='universal-sentence-encoder')</code>","children":[],"payload":{"lines":"17,18"}}],"payload":{"lines":"16,18"}},{"content":"<strong>Model Training:</strong>","children":[{"content":"UMAP for dimensionality reduction.","children":[],"payload":{"lines":"19,20"}},{"content":"HDBSCAN for clustering.","children":[],"payload":{"lines":"20,21"}}],"payload":{"lines":"18,21"}},{"content":"<strong>Save Model:</strong>","children":[{"content":"<code>model.save('top2vec_scientific_texts_model')</code>","children":[],"payload":{"lines":"22,24"}}],"payload":{"lines":"21,24"}}],"payload":{"lines":"12,13"}},{"content":"Thematic Groups Analysis","children":[{"content":"<strong>Example Group:</strong> \"UAV in Disasters and Emergency\"","children":[{"content":"<strong>Trend Analysis:</strong> Visualize interest over time.","children":[],"payload":{"lines":"26,27"}},{"content":"<strong>Key Metrics:</strong> Number of publications, growth rate, relative growth.","children":[],"payload":{"lines":"27,29"}}],"payload":{"lines":"25,29"}}],"payload":{"lines":"24,25"}},{"content":"Use Cases","children":[{"content":"<strong>Topic Discovery:</strong> Uncover main topics within scientific texts.","children":[],"payload":{"lines":"30,31"}},{"content":"<strong>Semantic Search:</strong> Find similar documents based on semantic content.","children":[],"payload":{"lines":"31,32"}},{"content":"<strong>Trend Analysis:</strong> Study the evolution of topics over time.","children":[],"payload":{"lines":"32,34"}}],"payload":{"lines":"29,30"}},{"content":"Data","children":[{"content":"<strong>Dataset:</strong> <a href=\"https://huggingface.co/datasets/CCRss/arxiv_papers_cs\">arxiv_papers_cs</a>","children":[{"content":"Contains scientific abstracts from arXiv.","children":[],"payload":{"lines":"36,38"}}],"payload":{"lines":"35,38"}}],"payload":{"lines":"34,35"}},{"content":"Examples","children":[{"content":"<strong>Trend Analysis Graph:</strong> Visualization of interest in \"UAV in Disasters and Emergency\".","children":[],"payload":{"lines":"39,40"}},{"content":"<strong>Key Metrics Table:</strong> Summary of publications and growth metrics for the thematic group.","children":[],"payload":{"lines":"40,42"}}],"payload":{"lines":"38,39"}},{"content":"Contributions","children":[{"content":"<strong>Feedback:</strong> Suggestions and improvements are welcome.","children":[],"payload":{"lines":"43,44"}},{"content":"<strong>Issues:</strong> Please report any issues or bugs.","children":[],"payload":{"lines":"44,46"}}],"payload":{"lines":"42,43"}}],"payload":{"lines":"0,1"}},null)</script>
|
| 34 |
-
</body>
|
| 35 |
-
</html>
|
| 36 |
|
| 37 |
|
| 38 |
# Top2Vec Scientific Texts Model
|
| 39 |
|
| 40 |
-

|
| 17 |
|
| 18 |
This repository hosts the `top2vec_scientific_texts` model, a specialized Top2Vec model trained on scientific texts for topic modeling and semantic search.
|
| 19 |
|
|
|
|
| 92 |
### Key Metrics Table
|
| 93 |
|
| 94 |
Analysis for Thematic Group: Disasters & Emergency
|
| 95 |
+
| Year | Number of Publications | Growth Acceleration | Change in Number of Publications | Relative Growth |
|
| 96 |
+
|-------:|-------------------------:|----------------------:|-----------------------------------:|:------------------|
|
| 97 |
+
| 2010 | 19 | 0 | 0 | 0.0% |
|
| 98 |
+
| 2011 | 15 | -4 | -4 | -21.05% |
|
| 99 |
+
| 2012 | 28 | 17 | 13 | 86.67% |
|
| 100 |
+
| 2013 | 38 | -3 | 10 | 35.71% |
|
| 101 |
+
| 2014 | 28 | -20 | -10 | -26.32% |
|
| 102 |
+
| 2015 | 47 | 29 | 19 | 67.86% |
|
| 103 |
+
| 2016 | 63 | -3 | 16 | 34.04% |
|
| 104 |
+
| 2017 | 94 | 15 | 31 | 49.21% |
|
| 105 |
+
| 2018 | 173 | 48 | 79 | 84.04% |
|
| 106 |
+
| 2019 | 266 | 14 | 93 | 53.76% |
|
| 107 |
+
| 2020 | 337 | -22 | 71 | 26.69% |
|
| 108 |
+
| 2021 | 380 | -28 | 43 | 12.76% |
|
| 109 |
+
| 2022 | 453 | 30 | 73 | 19.21% |
|
| 110 |
+
| 2023 | 509 | -17 | 56 | 12.36% |
|
| 111 |
|
| 112 |
## Contributions
|
| 113 |
|