Spaces:
Sleeping
Sleeping
Commit
·
7833461
1
Parent(s):
f26b169
augment from similar
Browse files
utils.py
CHANGED
|
@@ -123,6 +123,28 @@ def get_similarities_among_diseases_uris(
|
|
| 123 |
return data
|
| 124 |
|
| 125 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
def get_embedding(string: str, encoder) -> List[float]:
|
| 127 |
# Embed the string using sentence-transformers
|
| 128 |
vector = encoder.encode(string, show_progress_bar=False)
|
|
|
|
| 123 |
return data
|
| 124 |
|
| 125 |
|
| 126 |
+
def augment_the_set_of_diseaces(engine, diseases: List[str]) -> str:
|
| 127 |
+
|
| 128 |
+
for i in range(15-len(diseases)):
|
| 129 |
+
with engine.connect() as conn:
|
| 130 |
+
with conn.begin():
|
| 131 |
+
sql = f"""
|
| 132 |
+
SELECT TOP 1 e2.uri AS new_disease, (SUM(VECTOR_COSINE(e1.embedding, e2.embedding))/ {len(diseases)}) AS score
|
| 133 |
+
FROM Test.EntityEmbeddings e1, Test.EntityEmbeddings e2
|
| 134 |
+
WHERE e1.uri IN ({','.join([f"'http://identifiers.org/medgen/{disease}'" for disease in diseases])})
|
| 135 |
+
AND e2.uri NOT IN ({','.join([f"'http://identifiers.org/medgen/{disease}'" for disease in diseases])})
|
| 136 |
+
AND e2.label != 'nan'
|
| 137 |
+
GROUP BY e2.label
|
| 138 |
+
ORDER BY score DESC
|
| 139 |
+
"""
|
| 140 |
+
|
| 141 |
+
result = conn.execute(text(sql))
|
| 142 |
+
data = result.fetchall()
|
| 143 |
+
|
| 144 |
+
diseases.append(data[0][0].split('/')[-1])
|
| 145 |
+
|
| 146 |
+
return diseases
|
| 147 |
+
|
| 148 |
def get_embedding(string: str, encoder) -> List[float]:
|
| 149 |
# Embed the string using sentence-transformers
|
| 150 |
vector = encoder.encode(string, show_progress_bar=False)
|