|
|
|
|
| |
| |
| |
| import streamlit as st |
| import requests |
| from rdkit import Chem |
| from rdkit.Chem import Draw |
| import pandas as pd |
| import matplotlib.pyplot as plt |
| import seaborn as sns |
| from fpdf import FPDF |
| import tempfile |
| import logging |
| import os |
| import plotly.graph_objects as go |
| import networkx as nx |
| from typing import Optional, Dict, List, Any, Tuple |
| from openai import OpenAI |
|
|
| |
| logging.basicConfig( |
| level=logging.INFO, |
| format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', |
| handlers=[ |
| logging.FileHandler("pris_debug.log", mode='w'), |
| logging.StreamHandler() |
| ] |
| ) |
| logger = logging.getLogger("PRIS") |
|
|
| |
| |
| |
| API_ENDPOINTS: Dict[str, str] = { |
| |
| "clinical_trials": "https://clinicaltrials.gov/api/v2/studies", |
| "fda_drug_approval": "https://api.fda.gov/drug/label.json", |
| "faers_adverse_events": "https://api.fda.gov/drug/event.json", |
|
|
| |
| "pubchem": "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{}/JSON", |
| "pubmed": "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi", |
|
|
| |
| "pharmgkb_variant_clinical_annotations": "https://api.pharmgkb.org/v1/data/variant/{}/clinicalAnnotations", |
| "pharmgkb_gene": "https://api.pharmgkb.org/v1/data/gene/{}", |
| "pharmgkb_gene_variants": "https://api.pharmgkb.org/v1/data/gene/{}/variants", |
|
|
| |
| "bioportal_search": "https://data.bioontology.org/search", |
|
|
| |
| "rxnorm_rxcui": "https://rxnav.nlm.nih.gov/REST/rxcui.json", |
| "rxnorm_properties": "https://rxnav.nlm.nih.gov/REST/rxcui/{}/properties.json", |
| "rxclass_by_drug": "https://rxnav.nlm.nih.gov/REST/class/byDrugName.json" |
| } |
|
|
| DEFAULT_HEADERS: Dict[str, str] = { |
| "User-Agent": "PharmaResearchIntelligenceSuite/1.0 (Professional Use)", |
| "Accept": "application/json" |
| } |
|
|
| |
| |
| |
| class APIConfigurationError(Exception): |
| """Custom exception for missing or misconfigured API credentials.""" |
| pass |
|
|
| try: |
| |
| OPENAI_API_KEY: str = st.secrets["OPENAI_API_KEY"] |
| BIOPORTAL_API_KEY: str = st.secrets["BIOPORTAL_API_KEY"] |
| PUB_EMAIL: str = st.secrets["PUB_EMAIL"] |
| OPENFDA_KEY: str = st.secrets["OPENFDA_KEY"] |
|
|
| |
| if not all([OPENAI_API_KEY, BIOPORTAL_API_KEY, PUB_EMAIL, OPENFDA_KEY]): |
| raise APIConfigurationError("One or more required API credentials are missing.") |
|
|
| except (KeyError, APIConfigurationError) as e: |
| st.error(f"Critical configuration error: {str(e)}") |
| logger.critical(f"Configuration error: {str(e)}") |
| st.stop() |
|
|
| |
| |
| |
| class PharmaResearchEngine: |
| """ |
| Core engine for integrating and analyzing pharmaceutical data. |
| |
| This engine provides utility functions for API requests and chemical data extraction, |
| facilitating the seamless integration of multi-omics and clinical datasets. |
| """ |
| |
| def __init__(self) -> None: |
| |
| self.openai_client = OpenAI(api_key=OPENAI_API_KEY) |
| logger.info("PharmaResearchEngine initialized with OpenAI client.") |
| |
| @staticmethod |
| def api_request(endpoint: str, |
| params: Optional[Dict[str, Any]] = None, |
| headers: Optional[Dict[str, str]] = None) -> Optional[Dict[str, Any]]: |
| """ |
| Perform a resilient API GET request. |
| |
| Args: |
| endpoint (str): The URL endpoint for the API. |
| params (Optional[Dict[str, Any]]): Query parameters to be included in the request. |
| headers (Optional[Dict[str, str]]): Additional headers to include in the request. |
| |
| Returns: |
| Optional[Dict[str, Any]]: JSON response from the API, or None if an error occurs. |
| """ |
| try: |
| logger.debug(f"Requesting data from {endpoint} with params: {params}") |
| response = requests.get( |
| endpoint, |
| params=params, |
| headers={**DEFAULT_HEADERS, **(headers or {})}, |
| timeout=(3.05, 15) |
| ) |
| response.raise_for_status() |
| logger.info(f"Successful API request to {endpoint}") |
| return response.json() |
| except requests.exceptions.HTTPError as http_err: |
| logger.error(f"HTTP Error {http_err.response.status_code} for {endpoint}: {http_err}") |
| st.error(f"API HTTP Error: {http_err.response.status_code} - {http_err.response.reason}") |
| except Exception as e: |
| logger.error(f"Network error during API request to {endpoint}: {str(e)}") |
| st.error(f"Network error: {str(e)}") |
| return None |
|
|
| def get_compound_profile(self, identifier: str) -> Optional[Dict[str, str]]: |
| """ |
| Retrieve a comprehensive chemical profile for a given compound. |
| |
| This method queries the PubChem API using a provided identifier (name or SMILES) |
| and extracts key molecular properties. |
| |
| Args: |
| identifier (str): The compound name or SMILES string. |
| |
| Returns: |
| Optional[Dict[str, str]]: A dictionary containing molecular formula, IUPAC name, |
| canonical SMILES, molecular weight, and LogP. Returns None if data is unavailable. |
| """ |
| formatted_endpoint = API_ENDPOINTS["pubchem"].format(identifier) |
| logger.info(f"Fetching compound profile from PubChem for identifier: {identifier}") |
| pubchem_data = self.api_request(formatted_endpoint) |
| |
| if not pubchem_data or not pubchem_data.get("PC_Compounds"): |
| logger.warning("No compound data found in PubChem response.") |
| return None |
| |
| compound = pubchem_data["PC_Compounds"][0] |
| profile = { |
| 'molecular_formula': self._extract_property(compound, 'Molecular Formula'), |
| 'iupac_name': self._extract_property(compound, 'IUPAC Name'), |
| 'canonical_smiles': self._extract_property(compound, 'Canonical SMILES'), |
| 'molecular_weight': self._extract_property(compound, 'Molecular Weight'), |
| 'logp': self._extract_property(compound, 'LogP') |
| } |
| logger.debug(f"Extracted compound profile: {profile}") |
| return profile |
|
|
| def _extract_property(self, compound: Dict[str, Any], prop_name: str) -> str: |
| """ |
| Helper function to extract a specific property from PubChem compound data. |
| |
| Args: |
| compound (Dict[str, Any]): The compound data dictionary from PubChem. |
| prop_name (str): The name of the property to extract. |
| |
| Returns: |
| str: The extracted property value as a string, or "N/A" if not found. |
| """ |
| for prop in compound.get("props", []): |
| if prop.get("urn", {}).get("label") == prop_name: |
| |
| return str(prop["value"].get("sval", "N/A")) |
| logger.debug(f"Property '{prop_name}' not found for compound.") |
| return "N/A" |
|
|
| |
| |
| |
| class ClinicalIntelligence: |
| """ |
| Module for analyzing clinical trial landscapes and regulatory data. |
| |
| This class encapsulates methods for retrieving and processing clinical trial data |
| and FDA drug approval information. |
| """ |
| |
| def __init__(self) -> None: |
| self.engine = PharmaResearchEngine() |
| logger.info("ClinicalIntelligence module initialized.") |
| |
| def get_trial_landscape(self, query: str) -> List[Dict[str, Any]]: |
| """ |
| Analyze the clinical trial landscape for a specified query. |
| |
| Args: |
| query (str): A search term (condition, intervention, or NCT number) for clinical trials. |
| |
| Returns: |
| List[Dict[str, Any]]: A list of dictionaries representing the top clinical trials. |
| """ |
| |
| params = {"query.term": query, "retmax": 10} if not query.startswith("NCT") else {"id": query} |
| logger.info(f"Fetching clinical trials with query: {query}") |
| trials = self.engine.api_request(API_ENDPOINTS["clinical_trials"], params=params) |
| |
| |
| trial_list = trials.get("studies", [])[:5] if trials else [] |
| logger.debug(f"Retrieved {len(trial_list)} clinical trials for query '{query}'") |
| return trial_list |
|
|
| def get_fda_approval(self, drug_name: str) -> Optional[Dict[str, Any]]: |
| """ |
| Retrieve FDA approval information for a specified drug. |
| |
| Args: |
| drug_name (str): The name of the drug to query. |
| |
| Returns: |
| Optional[Dict[str, Any]]: A dictionary containing FDA approval details or None if unavailable. |
| """ |
| if not OPENFDA_KEY: |
| st.error("OpenFDA API key not configured.") |
| logger.error("Missing OpenFDA API key.") |
| return None |
| |
| params: Dict[str, Any] = { |
| "api_key": OPENFDA_KEY, |
| "search": f'openfda.brand_name:"{drug_name}"', |
| "limit": 1 |
| } |
| logger.info(f"Fetching FDA approval data for drug: {drug_name}") |
| data = self.engine.api_request(API_ENDPOINTS["fda_drug_approval"], params=params) |
| |
| if data and data.get("results"): |
| logger.debug(f"FDA approval data retrieved for drug: {drug_name}") |
| return data["results"][0] |
| logger.warning(f"No FDA approval data found for drug: {drug_name}") |
| return None |
|
|
| class AIDrugInnovator: |
| """ |
| AI-Driven Drug Development Strategist powered by GPT-4. |
| |
| This module leverages advanced language models to generate innovative drug development |
| strategies tailored to specific targets and therapeutic paradigms. |
| """ |
| |
| def __init__(self) -> None: |
| self.engine = PharmaResearchEngine() |
| logger.info("AIDrugInnovator module initialized with GPT-4 integration.") |
| |
| def generate_strategy(self, target: str, strategy: str) -> str: |
| """ |
| Generate an AI-driven development strategy. |
| |
| Constructs a detailed prompt for GPT-4 to generate a strategic plan including |
| target validation, lead optimization, clinical trial design, regulatory analysis, |
| and commercial potential assessment. |
| |
| Args: |
| target (str): The target disease, pathway, or biological entity. |
| strategy (str): The desired development paradigm (e.g., "First-in-class"). |
| |
| Returns: |
| str: A formatted strategic blueprint in Markdown. |
| """ |
| prompt: str = ( |
| f"As Chief Scientific Officer at a leading pharmaceutical company, " |
| f"develop a {strategy} development strategy for the target: {target}.\n\n" |
| "Include the following sections:\n" |
| "- **Target Validation Approach:** Describe methods to confirm the target's role in the disease.\n" |
| "- **Lead Optimization Tactics:** Outline strategies for refining lead compounds.\n" |
| "- **Clinical Trial Design:** Propose innovative trial designs and endpoints.\n" |
| "- **Regulatory Pathway Analysis:** Evaluate the regulatory strategy and compliance roadmap.\n" |
| "- **Commercial Potential Assessment:** Analyze market opportunity and competitive landscape.\n\n" |
| "Please format your response in Markdown with clear, well-defined sections." |
| ) |
| |
| logger.info(f"Generating AI strategy for target: {target} using paradigm: {strategy}") |
| try: |
| response = self.engine.openai_client.chat.completions.create( |
| model="gpt-4", |
| messages=[{"role": "user", "content": prompt}], |
| temperature=0.7, |
| max_tokens=1500 |
| ) |
| generated_strategy = response.choices[0].message.content |
| logger.debug("AI strategy generation successful.") |
| return generated_strategy |
| except Exception as e: |
| logger.error(f"Error during AI strategy generation: {str(e)}") |
| return "Strategy generation failed. Please check API configuration and try again." |
|
|
| |
| |
| |
| class PharmaResearchInterface: |
| """ |
| User Interface for the Pharma Research Intelligence Suite. |
| |
| This class configures and renders the Streamlit application, providing an interactive |
| environment for exploring drug innovation, clinical trial analytics, compound profiling, |
| regulatory insights, and AI-driven strategy generation. |
| """ |
| |
| def __init__(self) -> None: |
| self.clinical_intel = ClinicalIntelligence() |
| self.ai_innovator = AIDrugInnovator() |
| self._configure_page() |
| logger.info("PharmaResearchInterface initialized and page configured.") |
|
|
| def _configure_page(self) -> None: |
| """ |
| Configure the Streamlit page settings and apply custom CSS styles. |
| """ |
| st.set_page_config( |
| page_title="PRIS - Pharma Research Intelligence Suite", |
| layout="wide", |
| initial_sidebar_state="expanded" |
| ) |
| st.markdown(""" |
| <style> |
| .main {background-color: #f9f9f9; padding: 20px;} |
| .stAlert {padding: 20px; border: 1px solid #e0e0e0; border-radius: 5px; background-color: #fff;} |
| .reportview-container .markdown-text-container {font-family: 'Arial', sans-serif; line-height: 1.6;} |
| </style> |
| """, unsafe_allow_html=True) |
| logger.info("Streamlit page configuration completed.") |
|
|
| def render(self) -> None: |
| """ |
| Render the complete Streamlit user interface with multiple functional tabs. |
| """ |
| st.title("Pharma Research Intelligence Suite") |
| self._render_navigation() |
| logger.info("User interface rendered successfully.") |
| |
| def _render_navigation(self) -> None: |
| """ |
| Create a dynamic, tab-based navigation layout for different modules. |
| """ |
| tabs = st.tabs([ |
| "🚀 Drug Innovation", |
| "📈 Trial Analytics", |
| "🧪 Compound Profiler", |
| "📜 Regulatory Hub", |
| "🤖 AI Strategist" |
| ]) |
| |
| with tabs[0]: |
| self._drug_innovation() |
| with tabs[1]: |
| self._trial_analytics() |
| with tabs[2]: |
| self._compound_profiler() |
| with tabs[3]: |
| self._regulatory_hub() |
| with tabs[4]: |
| self._ai_strategist() |
|
|
| def _drug_innovation(self) -> None: |
| """ |
| Render the drug innovation module that generates AI-powered development strategies. |
| """ |
| st.header("AI-Powered Drug Innovation Engine") |
| col1, col2 = st.columns([1, 3]) |
| |
| with col1: |
| target = st.text_input("Target Pathobiology:", placeholder="e.g., EGFR mutant NSCLC") |
| strategy = st.selectbox("Development Paradigm:", |
| ["First-in-class", "Fast-follower", "Biologic", "ADC", "Gene Therapy"]) |
| if st.button("Generate Development Blueprint"): |
| with st.spinner("Formulating strategic plan..."): |
| blueprint = self.ai_innovator.generate_strategy(target, strategy) |
| st.markdown(blueprint, unsafe_allow_html=True) |
| logger.info("Drug innovation strategy generated and displayed.") |
|
|
| def _trial_analytics(self) -> None: |
| """ |
| Render the clinical trial analytics module to explore current trial landscapes. |
| """ |
| st.header("Clinical Trial Landscape Analysis") |
| trial_query = st.text_input("Search Clinical Trials:", placeholder="Enter condition, intervention, or NCT number") |
| |
| if st.button("Analyze Trial Landscape"): |
| with st.spinner("Fetching trial data..."): |
| trials = self.clinical_intel.get_trial_landscape(trial_query) |
| |
| if trials: |
| st.subheader("Top 5 Clinical Trials") |
| trial_data: List[Dict[str, Any]] = [] |
| for study in trials: |
| trial_data.append({ |
| "Title": study.get("protocolSection", {}).get("identificationModule", {}).get("briefTitle", "N/A"), |
| "Status": study.get("protocolSection", {}).get("statusModule", {}).get("overallStatus", "N/A"), |
| "Phase": study.get("protocolSection", {}).get("designModule", {}).get("phases", ["N/A"])[0], |
| "Enrollment": study.get("protocolSection", {}).get("designModule", {}).get("enrollmentInfo", {}).get("count", "N/A") |
| }) |
| |
| |
| df = pd.DataFrame(trial_data) |
| st.dataframe(df) |
| |
| |
| st.subheader("Trial Phase Distribution") |
| phase_counts = df["Phase"].value_counts() |
| fig, ax = plt.subplots() |
| sns.barplot(x=phase_counts.index, y=phase_counts.values, ax=ax) |
| ax.set_xlabel("Trial Phase") |
| ax.set_ylabel("Number of Trials") |
| st.pyplot(fig) |
| logger.info("Clinical trial analytics displayed successfully.") |
| else: |
| st.warning("No clinical trials found for the query.") |
| logger.warning("No clinical trial data returned from API.") |
|
|
| def _compound_profiler(self) -> None: |
| """ |
| Render the multi-omics compound profiler module for in-depth chemical analysis. |
| """ |
| st.header("Multi-Omics Compound Profiler") |
| compound = st.text_input("Analyze Compound:", placeholder="Enter drug name or SMILES") |
| |
| if compound: |
| with st.spinner("Decoding molecular profile..."): |
| profile = PharmaResearchEngine().get_compound_profile(compound) |
| |
| if profile: |
| col1, col2 = st.columns(2) |
| with col1: |
| st.subheader("Structural Insights") |
| mol = Chem.MolFromSmiles(profile['canonical_smiles']) |
| if mol: |
| |
| img = Draw.MolToImage(mol, size=(400, 300)) |
| st.image(img, caption="2D Molecular Structure") |
| else: |
| st.warning("Unable to render molecular structure from SMILES.") |
| logger.warning("RDKit failed to create molecule from SMILES.") |
| |
| with col2: |
| st.subheader("Physicochemical Profile") |
| st.metric("Molecular Weight", profile['molecular_weight']) |
| st.metric("LogP", profile['logp']) |
| st.metric("IUPAC Name", profile['iupac_name']) |
| st.code(f"SMILES: {profile['canonical_smiles']}") |
| logger.info("Compound profile details rendered.") |
| else: |
| st.warning("No compound data available. Please verify the input.") |
| logger.warning("Compound profiler did not return any data.") |
|
|
| def _regulatory_hub(self) -> None: |
| """ |
| Render the regulatory intelligence hub module for accessing FDA and regulatory data. |
| """ |
| st.header("Regulatory Intelligence Hub") |
| st.write("Access detailed insights into FDA approvals and regulatory pathways.") |
| drug_name = st.text_input("Enter Drug Name for Regulatory Analysis:", placeholder="e.g., aspirin") |
| |
| if st.button("Fetch Regulatory Data"): |
| with st.spinner("Retrieving regulatory information..."): |
| fda_data = self.clinical_intel.get_fda_approval(drug_name) |
| if fda_data: |
| st.subheader("FDA Approval Details") |
| st.json(fda_data) |
| logger.info("FDA regulatory data displayed.") |
| else: |
| st.warning("No FDA data found for the specified drug.") |
| logger.warning("FDA regulatory data retrieval returned no results.") |
|
|
| def _ai_strategist(self) -> None: |
| """ |
| Render the AI strategist module for generating innovative drug development strategies. |
| """ |
| st.header("AI Drug Development Strategist") |
| st.write("Utilize GPT-4 to craft cutting-edge drug development strategies.") |
| target = st.text_input("Enter Target Disease or Pathway:", placeholder="e.g., KRAS G12C mutation") |
| |
| if st.button("Generate AI Strategy"): |
| with st.spinner("Generating AI-driven strategy..."): |
| strategy = self.ai_innovator.generate_strategy(target, "First-in-class") |
| st.markdown(strategy, unsafe_allow_html=True) |
| logger.info("AI-driven strategy generated and displayed.") |
|
|
| |
| |
| |
| if __name__ == "__main__": |
| try: |
| interface = PharmaResearchInterface() |
| interface.render() |
| logger.info("PRIS application launched successfully.") |
| except Exception as e: |
| logger.critical(f"Unexpected error during application launch: {str(e)}") |
| st.error(f"Application failed to start due to an unexpected error: {str(e)}") |
|
|