File size: 4,316 Bytes
19bd8b9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# FILE: web/backend/python/dna_analyzer.py (FINAL VERSION - Shows ID and Name)

import sys
import json
import subprocess
import os

def get_species_details(blast_id):
    """

    Looks up the ugly BLAST ID and returns a rich data structure

    containing the pretty name and other info.

    """
    species_info_db = {
        "KY045437.1": {
            "species": "Salmo trutta",
            "commonName": "Brown Trout",
            "kingdom": "Animalia",
            "phylum": "Chordata",
            "ecologicalRole": "Top predator",
            "conservationStatus": "Least Concern",
            "indicators": ["Healthy fish population", "Good water quality"]
        },
        "LC143821.1": {
            "species": "Escherichia coli",
            "commonName": "E. coli",
            "kingdom": "Bacteria",
            "phylum": "Proteobacteria",
            "ecologicalRole": "Decomposer",
            "conservationStatus": "Pathogen Indicator",
            "indicators": ["Fecal contamination", "Health risk"]
        }
    }
    
    for known_id, data in species_info_db.items():
        if known_id in blast_id:
            # --- THIS IS THE CHANGE ---
            # We add the raw blast_id to the data we return.
            data['blastId'] = blast_id 
            return data
            
    # If we don't find a match, we still return the ID.
    return {"species": blast_id, "commonName": "Unknown Species", "blastId": blast_id}

def run_real_dna_analysis(file_path):
    try:
        script_dir = os.path.dirname(__file__)
        blast_db_dir = os.path.join(script_dir, 'blast_db')
        db_name = 'biostream_db'
        output_path = os.path.join(script_dir, '..', 'temp', f"{os.path.basename(file_path)}.csv")
        absolute_input_path = os.path.abspath(file_path)

        blast_command = [
            'blastn', '-query', absolute_input_path, '-db', db_name,
            '-out', output_path, '-outfmt', "10 sseqid pident", '-subject_besthit'
        ]
        
        process = subprocess.run(
            blast_command, check=True, capture_output=True, text=True, cwd=blast_db_dir
        )

        species_hits = {}
        if not os.path.exists(output_path):
             return {"error": "BLAST did not produce an output file."}

        with open(output_path, 'r') as f:
            for line in f:
                parts = line.strip().split(',')
                if len(parts) < 2: continue
                species_id, identity = parts[0], float(parts[1])
                if species_id not in species_hits:
                    species_hits[species_id] = {'count': 0, 'total_identity': 0}
                species_hits[species_id]['count'] += 1
                species_hits[species_id]['total_identity'] += identity

        if not species_hits:
             return { "detectedSpecies": [], "biodiversityMetrics": { "biodiversityScore": 0, "ecosystemHealth": "No Match Found" }}

        detected_species_list = []
        for species_id, data in species_hits.items():
            details = get_species_details(species_id)
            avg_identity = data['total_identity'] / data['count']
            
            details['confidence'] = round(avg_identity, 2)
            details['abundance'] = "Medium"
            details['dnaFragments'] = data['count']
            detected_species_list.append(details)

        os.remove(output_path)
        
        return {
            "detectedSpecies": detected_species_list,
            "biodiversityMetrics": { "speciesRichness": len(detected_species_list), "biodiversityScore": 85, "ecosystemHealth": "Good"},
            "waterQualityAssessment": { "overallQuality": "Good", "recommendations": ["Analysis complete."] }
        }

    except subprocess.CalledProcessError as e:
        return {"error": f"BLAST analysis failed. Details: {e.stderr}"}
    except Exception as e:
        return {"error": f"An error occurred in Python: {str(e)}"}

if __name__ == "__main__":
    try:
        dna_file_path = sys.argv[1]
        analysis_report = run_real_dna_analysis(dna_file_path)
        print(json.dumps(analysis_report))
    except Exception as e:
        print(json.dumps({"error": str(e)}), file=sys.stderr)
        sys.exit(1)