GreenPlusbyGXS / web /backend /python /dna_analyzer.py
gaialive's picture
Upload 136 files
19bd8b9 verified
# FILE: web/backend/python/dna_analyzer.py (FINAL VERSION - Shows ID and Name)
import sys
import json
import subprocess
import os
def get_species_details(blast_id):
"""
Looks up the ugly BLAST ID and returns a rich data structure
containing the pretty name and other info.
"""
species_info_db = {
"KY045437.1": {
"species": "Salmo trutta",
"commonName": "Brown Trout",
"kingdom": "Animalia",
"phylum": "Chordata",
"ecologicalRole": "Top predator",
"conservationStatus": "Least Concern",
"indicators": ["Healthy fish population", "Good water quality"]
},
"LC143821.1": {
"species": "Escherichia coli",
"commonName": "E. coli",
"kingdom": "Bacteria",
"phylum": "Proteobacteria",
"ecologicalRole": "Decomposer",
"conservationStatus": "Pathogen Indicator",
"indicators": ["Fecal contamination", "Health risk"]
}
}
for known_id, data in species_info_db.items():
if known_id in blast_id:
# --- THIS IS THE CHANGE ---
# We add the raw blast_id to the data we return.
data['blastId'] = blast_id
return data
# If we don't find a match, we still return the ID.
return {"species": blast_id, "commonName": "Unknown Species", "blastId": blast_id}
def run_real_dna_analysis(file_path):
try:
script_dir = os.path.dirname(__file__)
blast_db_dir = os.path.join(script_dir, 'blast_db')
db_name = 'biostream_db'
output_path = os.path.join(script_dir, '..', 'temp', f"{os.path.basename(file_path)}.csv")
absolute_input_path = os.path.abspath(file_path)
blast_command = [
'blastn', '-query', absolute_input_path, '-db', db_name,
'-out', output_path, '-outfmt', "10 sseqid pident", '-subject_besthit'
]
process = subprocess.run(
blast_command, check=True, capture_output=True, text=True, cwd=blast_db_dir
)
species_hits = {}
if not os.path.exists(output_path):
return {"error": "BLAST did not produce an output file."}
with open(output_path, 'r') as f:
for line in f:
parts = line.strip().split(',')
if len(parts) < 2: continue
species_id, identity = parts[0], float(parts[1])
if species_id not in species_hits:
species_hits[species_id] = {'count': 0, 'total_identity': 0}
species_hits[species_id]['count'] += 1
species_hits[species_id]['total_identity'] += identity
if not species_hits:
return { "detectedSpecies": [], "biodiversityMetrics": { "biodiversityScore": 0, "ecosystemHealth": "No Match Found" }}
detected_species_list = []
for species_id, data in species_hits.items():
details = get_species_details(species_id)
avg_identity = data['total_identity'] / data['count']
details['confidence'] = round(avg_identity, 2)
details['abundance'] = "Medium"
details['dnaFragments'] = data['count']
detected_species_list.append(details)
os.remove(output_path)
return {
"detectedSpecies": detected_species_list,
"biodiversityMetrics": { "speciesRichness": len(detected_species_list), "biodiversityScore": 85, "ecosystemHealth": "Good"},
"waterQualityAssessment": { "overallQuality": "Good", "recommendations": ["Analysis complete."] }
}
except subprocess.CalledProcessError as e:
return {"error": f"BLAST analysis failed. Details: {e.stderr}"}
except Exception as e:
return {"error": f"An error occurred in Python: {str(e)}"}
if __name__ == "__main__":
try:
dna_file_path = sys.argv[1]
analysis_report = run_real_dna_analysis(dna_file_path)
print(json.dumps(analysis_report))
except Exception as e:
print(json.dumps({"error": str(e)}), file=sys.stderr)
sys.exit(1)