Spaces:
Sleeping
Sleeping
File size: 4,316 Bytes
19bd8b9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
# FILE: web/backend/python/dna_analyzer.py (FINAL VERSION - Shows ID and Name)
import sys
import json
import subprocess
import os
def get_species_details(blast_id):
"""
Looks up the ugly BLAST ID and returns a rich data structure
containing the pretty name and other info.
"""
species_info_db = {
"KY045437.1": {
"species": "Salmo trutta",
"commonName": "Brown Trout",
"kingdom": "Animalia",
"phylum": "Chordata",
"ecologicalRole": "Top predator",
"conservationStatus": "Least Concern",
"indicators": ["Healthy fish population", "Good water quality"]
},
"LC143821.1": {
"species": "Escherichia coli",
"commonName": "E. coli",
"kingdom": "Bacteria",
"phylum": "Proteobacteria",
"ecologicalRole": "Decomposer",
"conservationStatus": "Pathogen Indicator",
"indicators": ["Fecal contamination", "Health risk"]
}
}
for known_id, data in species_info_db.items():
if known_id in blast_id:
# --- THIS IS THE CHANGE ---
# We add the raw blast_id to the data we return.
data['blastId'] = blast_id
return data
# If we don't find a match, we still return the ID.
return {"species": blast_id, "commonName": "Unknown Species", "blastId": blast_id}
def run_real_dna_analysis(file_path):
try:
script_dir = os.path.dirname(__file__)
blast_db_dir = os.path.join(script_dir, 'blast_db')
db_name = 'biostream_db'
output_path = os.path.join(script_dir, '..', 'temp', f"{os.path.basename(file_path)}.csv")
absolute_input_path = os.path.abspath(file_path)
blast_command = [
'blastn', '-query', absolute_input_path, '-db', db_name,
'-out', output_path, '-outfmt', "10 sseqid pident", '-subject_besthit'
]
process = subprocess.run(
blast_command, check=True, capture_output=True, text=True, cwd=blast_db_dir
)
species_hits = {}
if not os.path.exists(output_path):
return {"error": "BLAST did not produce an output file."}
with open(output_path, 'r') as f:
for line in f:
parts = line.strip().split(',')
if len(parts) < 2: continue
species_id, identity = parts[0], float(parts[1])
if species_id not in species_hits:
species_hits[species_id] = {'count': 0, 'total_identity': 0}
species_hits[species_id]['count'] += 1
species_hits[species_id]['total_identity'] += identity
if not species_hits:
return { "detectedSpecies": [], "biodiversityMetrics": { "biodiversityScore": 0, "ecosystemHealth": "No Match Found" }}
detected_species_list = []
for species_id, data in species_hits.items():
details = get_species_details(species_id)
avg_identity = data['total_identity'] / data['count']
details['confidence'] = round(avg_identity, 2)
details['abundance'] = "Medium"
details['dnaFragments'] = data['count']
detected_species_list.append(details)
os.remove(output_path)
return {
"detectedSpecies": detected_species_list,
"biodiversityMetrics": { "speciesRichness": len(detected_species_list), "biodiversityScore": 85, "ecosystemHealth": "Good"},
"waterQualityAssessment": { "overallQuality": "Good", "recommendations": ["Analysis complete."] }
}
except subprocess.CalledProcessError as e:
return {"error": f"BLAST analysis failed. Details: {e.stderr}"}
except Exception as e:
return {"error": f"An error occurred in Python: {str(e)}"}
if __name__ == "__main__":
try:
dna_file_path = sys.argv[1]
analysis_report = run_real_dna_analysis(dna_file_path)
print(json.dumps(analysis_report))
except Exception as e:
print(json.dumps({"error": str(e)}), file=sys.stderr)
sys.exit(1) |