Spaces:
Sleeping
Sleeping
| """ | |
| Drug Database Utility Functions | |
| Handles searching and extracting drug information from the database | |
| """ | |
| import pandas as pd | |
| import ast | |
| from typing import Optional, List, Tuple | |
| def find_drug_in_database(drug_name: str, drugs_df: pd.DataFrame) -> Tuple[Optional[str], Optional[List[str]]]: | |
| """ | |
| Extract SMILES and targets for a drug by EXACT name match first, then partial | |
| Args: | |
| drug_name: Name of the drug to search for | |
| drugs_df: DataFrame containing drug information with columns: | |
| - DrugName: name of the drug | |
| - DrugSmile: SMILES structure | |
| - DrugTarget: protein targets (as string representation of list) | |
| Returns: | |
| Tuple of (smiles, targets): | |
| - smiles: SMILES string representation of molecular structure, or None if not found | |
| - targets: List of protein target names, or empty list if none found | |
| Examples: | |
| >>> smiles, targets = find_drug_in_database("Naltrexone", drugs_df) | |
| >>> print(f"SMILES: {smiles}") | |
| >>> print(f"Targets: {targets}") | |
| """ | |
| if drugs_df is None: | |
| return None, None | |
| # Try EXACT match first (case-insensitive) | |
| mask = drugs_df['DrugName'].str.lower() == drug_name.lower() | |
| matches = drugs_df[mask] | |
| # If no exact match, try partial match | |
| if len(matches) == 0: | |
| mask = drugs_df['DrugName'].str.lower().str.contains(drug_name.lower(), na=False) | |
| matches = drugs_df[mask] | |
| if len(matches) == 0: | |
| return None, None | |
| # Get the first match | |
| row = matches.iloc[0] | |
| # Extract SMILES - try multiple possible column names | |
| smiles = None | |
| smiles_columns = ['DrugSmile', 'SMILES', 'Smiles', 'smiles', 'DrugSMILES', 'Structure'] | |
| for col in smiles_columns: | |
| if col in row.index and pd.notna(row[col]) and str(row[col]).strip(): | |
| smiles = str(row[col]).strip() | |
| break | |
| # Extract Targets - try multiple possible column names | |
| targets = [] | |
| target_columns = ['DrugTarget', 'Targets', 'Target', 'targets', 'ProteinTargets'] | |
| for col in target_columns: | |
| if col in row.index and pd.notna(row[col]) and str(row[col]).strip(): | |
| raw_targets = str(row[col]).strip() | |
| # Parse if it's a string representation of a list | |
| if raw_targets.startswith('[') and raw_targets.endswith(']'): | |
| try: | |
| targets = ast.literal_eval(raw_targets) | |
| except: | |
| targets = [raw_targets] | |
| elif ',' in raw_targets: | |
| # Handle comma-separated targets | |
| targets = [t.strip() for t in raw_targets.split(',')] | |
| else: | |
| # Single target | |
| targets = [raw_targets] | |
| break | |
| return smiles, targets if targets else [] | |
| def search_drugs_by_target(target_name: str, drugs_df: pd.DataFrame, | |
| exact_match: bool = False) -> pd.DataFrame: | |
| """ | |
| Search for drugs that target a specific protein | |
| Args: | |
| target_name: Name of the protein target to search for | |
| drugs_df: DataFrame containing drug information | |
| exact_match: If True, only return exact matches; if False, use partial matching | |
| Returns: | |
| DataFrame containing all drugs that target the specified protein | |
| """ | |
| if drugs_df is None: | |
| return pd.DataFrame() | |
| def has_target(targets_str): | |
| if pd.isna(targets_str): | |
| return False | |
| # Parse targets | |
| targets = [] | |
| if str(targets_str).startswith('['): | |
| try: | |
| targets = ast.literal_eval(str(targets_str)) | |
| except: | |
| targets = [str(targets_str)] | |
| else: | |
| targets = [str(targets_str)] | |
| # Check if target matches | |
| for target in targets: | |
| if exact_match: | |
| if target.lower() == target_name.lower(): | |
| return True | |
| else: | |
| if target_name.lower() in target.lower(): | |
| return True | |
| return False | |
| mask = drugs_df['DrugTarget'].apply(has_target) | |
| return drugs_df[mask] | |
| def get_drug_info(drug_name: str, drugs_df: pd.DataFrame) -> dict: | |
| """ | |
| Get complete information about a drug | |
| Args: | |
| drug_name: Name of the drug | |
| drugs_df: DataFrame containing drug information | |
| Returns: | |
| Dictionary with drug information including: | |
| - name: Drug name | |
| - smiles: SMILES structure | |
| - targets: List of protein targets | |
| - found: Boolean indicating if drug was found | |
| """ | |
| smiles, targets = find_drug_in_database(drug_name, drugs_df) | |
| return { | |
| 'name': drug_name, | |
| 'smiles': smiles, | |
| 'targets': targets if targets else [], | |
| 'found': smiles is not None | |
| } | |
| def validate_smiles(smiles: str) -> bool: | |
| """ | |
| Validate if a SMILES string is valid (requires RDKit) | |
| Args: | |
| smiles: SMILES string to validate | |
| Returns: | |
| True if valid, False otherwise | |
| """ | |
| try: | |
| from rdkit import Chem | |
| mol = Chem.MolFromSmiles(smiles) | |
| return mol is not None | |
| except: | |
| return False | |
| # Example usage and testing | |
| if __name__ == "__main__": | |
| # Load example data | |
| try: | |
| drugs_df = pd.read_csv("data1/drugsInfo.csv") | |
| print(f" Loaded {len(drugs_df)} drugs from database\n") | |
| # Test 1: Find a specific drug | |
| print("="*60) | |
| print("TEST 1: Find Naltrexone") | |
| print("="*60) | |
| smiles, targets = find_drug_in_database("Naltrexone", drugs_df) | |
| if smiles: | |
| print(f" Found!") | |
| print(f" SMILES: {smiles[:60]}...") | |
| print(f" Targets: {targets[:3]}{'...' if len(targets) > 3 else ''}") | |
| else: | |
| print(f"❌ Not found") | |
| # Test 2: Get complete info | |
| print("\n" + "="*60) | |
| print("TEST 2: Get complete drug info") | |
| print("="*60) | |
| info = get_drug_info("Buprenorphine", drugs_df) | |
| print(f"Drug: {info['name']}") | |
| print(f"Found: {info['found']}") | |
| print(f"Targets: {len(info['targets'])} targets") | |
| # Test 3: Search by target | |
| print("\n" + "="*60) | |
| print("TEST 3: Search drugs targeting opioid receptors") | |
| print("="*60) | |
| results = search_drugs_by_target("opioid", drugs_df, exact_match=False) | |
| print(f"Found {len(results)} drugs") | |
| if len(results) > 0: | |
| print("Examples:") | |
| for i, (idx, row) in enumerate(results.head(3).iterrows(), 1): | |
| print(f" {i}. {row['DrugName']}") | |
| except FileNotFoundError: | |
| print("❌ Error: data1/drugsInfo.csv not found") | |
| print(" Make sure the database file exists in the correct location") |