Spaces:
Running
Running
Update mtdna_classifier.py
Browse files- mtdna_classifier.py +13 -11
mtdna_classifier.py
CHANGED
|
@@ -236,17 +236,19 @@ def classify_mtDNA_sample_from_haplo(text):
|
|
| 236 |
}
|
| 237 |
# 4.3 Get from available NCBI
|
| 238 |
def infer_location_fromNCBI(accession):
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
|
|
|
|
|
|
| 250 |
|
| 251 |
# STEP 5: Main pipeline: accession -> 1. get pubmed id and isolate -> 2. get doi -> 3. get text -> 4. prediction -> 5. output: inferred location + explanation + confidence score
|
| 252 |
def classify_sample_location(accession):
|
|
|
|
| 236 |
}
|
| 237 |
# 4.3 Get from available NCBI
|
| 238 |
def infer_location_fromNCBI(accession):
|
| 239 |
+
try:
|
| 240 |
+
handle = Entrez.efetch(db="nuccore", id=accession, rettype="medline", retmode="text")
|
| 241 |
+
text = handle.read()
|
| 242 |
+
handle.close()
|
| 243 |
+
match = re.search(r'/(geo_loc_name|country|location)\s*=\s*"([^"]+)"', text)
|
| 244 |
+
if match:
|
| 245 |
+
return match.group(2) # This is the value like "Brunei"
|
| 246 |
+
return None
|
| 247 |
+
|
| 248 |
+
except Exception as e:
|
| 249 |
+
print("❌ Entrez error:", e)
|
| 250 |
+
return ""
|
| 251 |
+
|
| 252 |
|
| 253 |
# STEP 5: Main pipeline: accession -> 1. get pubmed id and isolate -> 2. get doi -> 3. get text -> 4. prediction -> 5. output: inferred location + explanation + confidence score
|
| 254 |
def classify_sample_location(accession):
|