File size: 10,518 Bytes
237f2c8
 
ede0e82
f67b4aa
 
ede0e82
 
237f2c8
f67b4aa
ede0e82
 
 
237f2c8
 
 
 
ede0e82
 
 
237f2c8
 
 
 
 
 
 
ede0e82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237f2c8
 
 
 
ede0e82
237f2c8
 
 
 
 
 
 
d49061a
 
8f8e3c7
d49061a
8f8e3c7
237f2c8
d49061a
 
 
 
8f8e3c7
d49061a
8f8e3c7
d49061a
 
 
 
 
 
 
8f8e3c7
d49061a
8f8e3c7
d49061a
 
 
 
 
 
 
237f2c8
 
 
 
d49061a
 
 
 
 
237f2c8
 
d49061a
8f8e3c7
 
237f2c8
d49061a
 
 
 
 
 
237f2c8
 
 
 
d49061a
 
 
237f2c8
 
d49061a
237f2c8
 
 
 
45aa19b
f67b4aa
8f8e3c7
237f2c8
 
 
 
f67b4aa
ede0e82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237f2c8
 
 
f67b4aa
ede0e82
f67b4aa
237f2c8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List, Optional
import requests
import base64
import asyncio
import aiohttp
import uvicorn

app = FastAPI(title="Molecular Structure API", version="1.1.0", description="API for Dr. Gini - Fetch molecular structure images")

# ============== Models ==============

class CompoundRequest(BaseModel):
    compound_name: str

class BatchCompoundRequest(BaseModel):
    compound_names: List[str]

class MolecularResponse(BaseModel):
    success: bool
    image_url: str = None
    metadata: dict = None
    size: int = None
    error: str = None

class MolecularResult(BaseModel):
    compound: str
    success: bool
    image_url: Optional[str] = None
    metadata: Optional[dict] = None
    size: Optional[int] = None
    error: Optional[str] = None

class BatchMolecularResponse(BaseModel):
    success: bool
    count: int
    results: List[MolecularResult]


# ============== API Endpoints ==============

@app.get("/")
async def root():
    return {"message": "Molecular Structure API", "docs": "/docs"}


@app.post("/molecular-structure", response_model=MolecularResponse)
async def get_molecular_structure(request: CompoundRequest):
    """Get molecular structure image for a chemical compound"""
    
    compound_name = request.compound_name.strip()
    if not compound_name:
        raise HTTPException(status_code=400, detail="Compound name is required")
    
    try:
        print(f"Processing: {compound_name}")
        
        # Step 1: Get InChIKey from NIH
        inchikey_url = f"https://cactus.nci.nih.gov/chemical/structure/{compound_name}/stdinchikey"
        response = requests.get(inchikey_url, timeout=10)
        
        if response.status_code == 200 and response.text.strip():
            inchikey = response.text.strip().replace('InChIKey=', '')
            print(f"Found InChIKey: {inchikey}")
            
            # Step 2: Get CID from PubChem
            cid_url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/inchikey/{inchikey}/cids/JSON"
            cid_response = requests.get(cid_url, timeout=10)
            
            if cid_response.status_code == 200:
                cid_data = cid_response.json()
                if 'IdentifierList' in cid_data and 'CID' in cid_data['IdentifierList']:
                    cid = cid_data['IdentifierList']['CID'][0]
                    print(f"Found CID: {cid}")
                    
                    # Step 3: Get image from PubChem
                    image_url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{cid}/PNG"
                    img_response = requests.get(image_url, timeout=15)
                    
                    if img_response.status_code == 200 and len(img_response.content) > 1000:
                        base64_data = base64.b64encode(img_response.content).decode()
                        data_url = f"data:image/png;base64,{base64_data}"
                        
                        return MolecularResponse(
                            success=True,
                            image_url=data_url,
                            metadata={
                                "compound": compound_name,
                                "cid": cid,
                                "inchikey": inchikey,
                                "source": "PubChem"
                            },
                            size=len(img_response.content)
                        )
        
        # Fallback to NIH direct image
        print("Trying NIH fallback...")
        nih_url = f"https://cactus.nci.nih.gov/chemical/structure/{compound_name}/image"
        nih_response = requests.get(nih_url, timeout=15)
        
        if nih_response.status_code == 200 and len(nih_response.content) > 1000:
            base64_data = base64.b64encode(nih_response.content).decode()
            data_url = f"data:image/png;base64,{base64_data}"
            
            return MolecularResponse(
                success=True,
                image_url=data_url,
                metadata={
                    "compound": compound_name,
                    "source": "NIH Direct"
                },
                size=len(nih_response.content)
            )
        
        return MolecularResponse(
            success=False,
            error="No molecular structure found"
        )
        
    except Exception as e:
        print(f"Error: {str(e)}")
        return MolecularResponse(
            success=False,
            error=str(e)
        )


# ============== NEW: Batch Endpoint ==============

async def fetch_structure_async(session: aiohttp.ClientSession, compound_name: str) -> MolecularResult:
    """Async function to fetch a single compound's structure (for batch processing)"""
    compound_name = compound_name.strip()
    
    if not compound_name:
        return MolecularResult(compound=compound_name, success=False, error="Empty compound name")
    
    try:
        print(f"[Batch] Processing: {compound_name}")
        
        # Step 1: Get InChIKey from NIH
        inchikey_url = f"https://cactus.nci.nih.gov/chemical/structure/{compound_name}/stdinchikey"
        async with session.get(inchikey_url, timeout=aiohttp.ClientTimeout(total=10)) as response:
            if response.status == 200:
                text = await response.text()
                if text.strip():
                    inchikey = text.strip().replace('InChIKey=', '')
                    print(f"[Batch] Found InChIKey for {compound_name}: {inchikey}")
                    
                    # Step 2: Get CID from PubChem
                    cid_url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/inchikey/{inchikey}/cids/JSON"
                    async with session.get(cid_url, timeout=aiohttp.ClientTimeout(total=10)) as cid_response:
                        if cid_response.status == 200:
                            cid_data = await cid_response.json()
                            if 'IdentifierList' in cid_data and 'CID' in cid_data['IdentifierList']:
                                cid = cid_data['IdentifierList']['CID'][0]
                                print(f"[Batch] Found CID for {compound_name}: {cid}")
                                
                                # Step 3: Get image from PubChem
                                image_url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{cid}/PNG"
                                async with session.get(image_url, timeout=aiohttp.ClientTimeout(total=15)) as img_response:
                                    if img_response.status == 200:
                                        content = await img_response.read()
                                        if len(content) > 1000:
                                            base64_data = base64.b64encode(content).decode()
                                            return MolecularResult(
                                                compound=compound_name,
                                                success=True,
                                                image_url=f"data:image/png;base64,{base64_data}",
                                                metadata={
                                                    "compound": compound_name,
                                                    "cid": cid,
                                                    "inchikey": inchikey,
                                                    "source": "PubChem"
                                                },
                                                size=len(content)
                                            )
        
        # Fallback to NIH direct image
        print(f"[Batch] Trying NIH fallback for {compound_name}...")
        nih_url = f"https://cactus.nci.nih.gov/chemical/structure/{compound_name}/image"
        async with session.get(nih_url, timeout=aiohttp.ClientTimeout(total=15)) as nih_response:
            if nih_response.status == 200:
                content = await nih_response.read()
                if len(content) > 1000:
                    base64_data = base64.b64encode(content).decode()
                    return MolecularResult(
                        compound=compound_name,
                        success=True,
                        image_url=f"data:image/png;base64,{base64_data}",
                        metadata={
                            "compound": compound_name,
                            "source": "NIH Direct"
                        },
                        size=len(content)
                    )
        
        return MolecularResult(
            compound=compound_name,
            success=False,
            error=f"No molecular structure found for '{compound_name}'"
        )
        
    except asyncio.TimeoutError:
        print(f"[Batch] Timeout for {compound_name}")
        return MolecularResult(compound=compound_name, success=False, error="Request timeout")
    except Exception as e:
        print(f"[Batch] Error for {compound_name}: {str(e)}")
        return MolecularResult(compound=compound_name, success=False, error=str(e))


@app.post("/molecular-structure/batch", response_model=BatchMolecularResponse)
async def get_batch_molecular_structures(request: BatchCompoundRequest):
    """Get molecular structures for multiple compounds in parallel"""
    
    if not request.compound_names:
        raise HTTPException(status_code=400, detail="At least one compound name required")
    
    # Limit to 10 compounds per request
    if len(request.compound_names) > 10:
        raise HTTPException(status_code=400, detail="Maximum 10 compounds per request")
    
    # Remove duplicates while preserving order
    unique_compounds = list(dict.fromkeys(request.compound_names))
    print(f"[Batch] Processing {len(unique_compounds)} compounds: {unique_compounds}")
    
    async with aiohttp.ClientSession() as session:
        tasks = [fetch_structure_async(session, name) for name in unique_compounds]
        results = await asyncio.gather(*tasks)
    
    print(f"[Batch] Completed. Success: {sum(1 for r in results if r.success)}/{len(results)}")
    
    return BatchMolecularResponse(
        success=any(r.success for r in results),
        count=len(results),
        results=list(results)
    )


@app.get("/health")
async def health_check():
    return {"status": "healthy"}


if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=7860)