Spaces:

GXSNetwork
/

GreenPlusbyGXS

Running

App Files Files Community

GreenPlusbyGXS / web /backend /python /dna_analyzer.py

gaialive

Upload 136 files

19bd8b9 verified 5 months ago

raw

history blame contribute delete

4.32 kB

	# FILE: web/backend/python/dna_analyzer.py (FINAL VERSION - Shows ID and Name)

	import sys
	import json
	import subprocess
	import os

	def get_species_details(blast_id):
	"""
	Looks up the ugly BLAST ID and returns a rich data structure
	containing the pretty name and other info.
	"""
	species_info_db = {
	"KY045437.1": {
	"species": "Salmo trutta",
	"commonName": "Brown Trout",
	"kingdom": "Animalia",
	"phylum": "Chordata",
	"ecologicalRole": "Top predator",
	"conservationStatus": "Least Concern",
	"indicators": ["Healthy fish population", "Good water quality"]
	},
	"LC143821.1": {
	"species": "Escherichia coli",
	"commonName": "E. coli",
	"kingdom": "Bacteria",
	"phylum": "Proteobacteria",
	"ecologicalRole": "Decomposer",
	"conservationStatus": "Pathogen Indicator",
	"indicators": ["Fecal contamination", "Health risk"]
	}
	}

	for known_id, data in species_info_db.items():
	if known_id in blast_id:
	# --- THIS IS THE CHANGE ---
	# We add the raw blast_id to the data we return.
	data['blastId'] = blast_id
	return data

	# If we don't find a match, we still return the ID.
	return {"species": blast_id, "commonName": "Unknown Species", "blastId": blast_id}

	def run_real_dna_analysis(file_path):
	try:
	script_dir = os.path.dirname(__file__)
	blast_db_dir = os.path.join(script_dir, 'blast_db')
	db_name = 'biostream_db'
	output_path = os.path.join(script_dir, '..', 'temp', f"{os.path.basename(file_path)}.csv")
	absolute_input_path = os.path.abspath(file_path)

	blast_command = [
	'blastn', '-query', absolute_input_path, '-db', db_name,
	'-out', output_path, '-outfmt', "10 sseqid pident", '-subject_besthit'
	]

	process = subprocess.run(
	blast_command, check=True, capture_output=True, text=True, cwd=blast_db_dir
	)

	species_hits = {}
	if not os.path.exists(output_path):
	return {"error": "BLAST did not produce an output file."}

	with open(output_path, 'r') as f:
	for line in f:
	parts = line.strip().split(',')
	if len(parts) < 2: continue
	species_id, identity = parts[0], float(parts[1])
	if species_id not in species_hits:
	species_hits[species_id] = {'count': 0, 'total_identity': 0}
	species_hits[species_id]['count'] += 1
	species_hits[species_id]['total_identity'] += identity

	if not species_hits:
	return { "detectedSpecies": [], "biodiversityMetrics": { "biodiversityScore": 0, "ecosystemHealth": "No Match Found" }}

	detected_species_list = []
	for species_id, data in species_hits.items():
	details = get_species_details(species_id)
	avg_identity = data['total_identity'] / data['count']

	details['confidence'] = round(avg_identity, 2)
	details['abundance'] = "Medium"
	details['dnaFragments'] = data['count']
	detected_species_list.append(details)

	os.remove(output_path)

	return {
	"detectedSpecies": detected_species_list,
	"biodiversityMetrics": { "speciesRichness": len(detected_species_list), "biodiversityScore": 85, "ecosystemHealth": "Good"},
	"waterQualityAssessment": { "overallQuality": "Good", "recommendations": ["Analysis complete."] }
	}

	except subprocess.CalledProcessError as e:
	return {"error": f"BLAST analysis failed. Details: {e.stderr}"}
	except Exception as e:
	return {"error": f"An error occurred in Python: {str(e)}"}

	if __name__ == "__main__":
	try:
	dna_file_path = sys.argv[1]
	analysis_report = run_real_dna_analysis(dna_file_path)
	print(json.dumps(analysis_report))
	except Exception as e:
	print(json.dumps({"error": str(e)}), file=sys.stderr)
	sys.exit(1)