PRSHNTKUMR's picture
Update app.py
ede0e82 verified
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List, Optional
import requests
import base64
import asyncio
import aiohttp
import uvicorn
app = FastAPI(title="Molecular Structure API", version="1.1.0", description="API for Dr. Gini - Fetch molecular structure images")
# ============== Models ==============
class CompoundRequest(BaseModel):
compound_name: str
class BatchCompoundRequest(BaseModel):
compound_names: List[str]
class MolecularResponse(BaseModel):
success: bool
image_url: str = None
metadata: dict = None
size: int = None
error: str = None
class MolecularResult(BaseModel):
compound: str
success: bool
image_url: Optional[str] = None
metadata: Optional[dict] = None
size: Optional[int] = None
error: Optional[str] = None
class BatchMolecularResponse(BaseModel):
success: bool
count: int
results: List[MolecularResult]
# ============== API Endpoints ==============
@app.get("/")
async def root():
return {"message": "Molecular Structure API", "docs": "/docs"}
@app.post("/molecular-structure", response_model=MolecularResponse)
async def get_molecular_structure(request: CompoundRequest):
"""Get molecular structure image for a chemical compound"""
compound_name = request.compound_name.strip()
if not compound_name:
raise HTTPException(status_code=400, detail="Compound name is required")
try:
print(f"Processing: {compound_name}")
# Step 1: Get InChIKey from NIH
inchikey_url = f"https://cactus.nci.nih.gov/chemical/structure/{compound_name}/stdinchikey"
response = requests.get(inchikey_url, timeout=10)
if response.status_code == 200 and response.text.strip():
inchikey = response.text.strip().replace('InChIKey=', '')
print(f"Found InChIKey: {inchikey}")
# Step 2: Get CID from PubChem
cid_url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/inchikey/{inchikey}/cids/JSON"
cid_response = requests.get(cid_url, timeout=10)
if cid_response.status_code == 200:
cid_data = cid_response.json()
if 'IdentifierList' in cid_data and 'CID' in cid_data['IdentifierList']:
cid = cid_data['IdentifierList']['CID'][0]
print(f"Found CID: {cid}")
# Step 3: Get image from PubChem
image_url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{cid}/PNG"
img_response = requests.get(image_url, timeout=15)
if img_response.status_code == 200 and len(img_response.content) > 1000:
base64_data = base64.b64encode(img_response.content).decode()
data_url = f"data:image/png;base64,{base64_data}"
return MolecularResponse(
success=True,
image_url=data_url,
metadata={
"compound": compound_name,
"cid": cid,
"inchikey": inchikey,
"source": "PubChem"
},
size=len(img_response.content)
)
# Fallback to NIH direct image
print("Trying NIH fallback...")
nih_url = f"https://cactus.nci.nih.gov/chemical/structure/{compound_name}/image"
nih_response = requests.get(nih_url, timeout=15)
if nih_response.status_code == 200 and len(nih_response.content) > 1000:
base64_data = base64.b64encode(nih_response.content).decode()
data_url = f"data:image/png;base64,{base64_data}"
return MolecularResponse(
success=True,
image_url=data_url,
metadata={
"compound": compound_name,
"source": "NIH Direct"
},
size=len(nih_response.content)
)
return MolecularResponse(
success=False,
error="No molecular structure found"
)
except Exception as e:
print(f"Error: {str(e)}")
return MolecularResponse(
success=False,
error=str(e)
)
# ============== NEW: Batch Endpoint ==============
async def fetch_structure_async(session: aiohttp.ClientSession, compound_name: str) -> MolecularResult:
"""Async function to fetch a single compound's structure (for batch processing)"""
compound_name = compound_name.strip()
if not compound_name:
return MolecularResult(compound=compound_name, success=False, error="Empty compound name")
try:
print(f"[Batch] Processing: {compound_name}")
# Step 1: Get InChIKey from NIH
inchikey_url = f"https://cactus.nci.nih.gov/chemical/structure/{compound_name}/stdinchikey"
async with session.get(inchikey_url, timeout=aiohttp.ClientTimeout(total=10)) as response:
if response.status == 200:
text = await response.text()
if text.strip():
inchikey = text.strip().replace('InChIKey=', '')
print(f"[Batch] Found InChIKey for {compound_name}: {inchikey}")
# Step 2: Get CID from PubChem
cid_url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/inchikey/{inchikey}/cids/JSON"
async with session.get(cid_url, timeout=aiohttp.ClientTimeout(total=10)) as cid_response:
if cid_response.status == 200:
cid_data = await cid_response.json()
if 'IdentifierList' in cid_data and 'CID' in cid_data['IdentifierList']:
cid = cid_data['IdentifierList']['CID'][0]
print(f"[Batch] Found CID for {compound_name}: {cid}")
# Step 3: Get image from PubChem
image_url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{cid}/PNG"
async with session.get(image_url, timeout=aiohttp.ClientTimeout(total=15)) as img_response:
if img_response.status == 200:
content = await img_response.read()
if len(content) > 1000:
base64_data = base64.b64encode(content).decode()
return MolecularResult(
compound=compound_name,
success=True,
image_url=f"data:image/png;base64,{base64_data}",
metadata={
"compound": compound_name,
"cid": cid,
"inchikey": inchikey,
"source": "PubChem"
},
size=len(content)
)
# Fallback to NIH direct image
print(f"[Batch] Trying NIH fallback for {compound_name}...")
nih_url = f"https://cactus.nci.nih.gov/chemical/structure/{compound_name}/image"
async with session.get(nih_url, timeout=aiohttp.ClientTimeout(total=15)) as nih_response:
if nih_response.status == 200:
content = await nih_response.read()
if len(content) > 1000:
base64_data = base64.b64encode(content).decode()
return MolecularResult(
compound=compound_name,
success=True,
image_url=f"data:image/png;base64,{base64_data}",
metadata={
"compound": compound_name,
"source": "NIH Direct"
},
size=len(content)
)
return MolecularResult(
compound=compound_name,
success=False,
error=f"No molecular structure found for '{compound_name}'"
)
except asyncio.TimeoutError:
print(f"[Batch] Timeout for {compound_name}")
return MolecularResult(compound=compound_name, success=False, error="Request timeout")
except Exception as e:
print(f"[Batch] Error for {compound_name}: {str(e)}")
return MolecularResult(compound=compound_name, success=False, error=str(e))
@app.post("/molecular-structure/batch", response_model=BatchMolecularResponse)
async def get_batch_molecular_structures(request: BatchCompoundRequest):
"""Get molecular structures for multiple compounds in parallel"""
if not request.compound_names:
raise HTTPException(status_code=400, detail="At least one compound name required")
# Limit to 10 compounds per request
if len(request.compound_names) > 10:
raise HTTPException(status_code=400, detail="Maximum 10 compounds per request")
# Remove duplicates while preserving order
unique_compounds = list(dict.fromkeys(request.compound_names))
print(f"[Batch] Processing {len(unique_compounds)} compounds: {unique_compounds}")
async with aiohttp.ClientSession() as session:
tasks = [fetch_structure_async(session, name) for name in unique_compounds]
results = await asyncio.gather(*tasks)
print(f"[Batch] Completed. Success: {sum(1 for r in results if r.success)}/{len(results)}")
return BatchMolecularResponse(
success=any(r.success for r in results),
count=len(results),
results=list(results)
)
@app.get("/health")
async def health_check():
return {"status": "healthy"}
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860)