import gradio as gr import random import os import re import json from openai import OpenAI from langchain_openai import ChatOpenAI from langchain_community.document_loaders.csv_loader import CSVLoader from langchain_community.document_loaders import JSONLoader from langchain_community.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.vectorstores import FAISS from langchain_openai import OpenAIEmbeddings from langchain.chains import RetrievalQA os.environ["OPENAI_API_KEY"] = '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' client = OpenAI( api_key=os.environ.get("OPENAI_API_KEY"), # This is the default and can be omitted ) ## Load JSON file json_loader = JSONLoader( file_path="data.json", jq_schema=".[]", # Extract each JSON object from the list text_content=False # Store "title" as metadata ) json_doc = json_loader.load() ## Load PDF file pdf_loader = PyPDFLoader("./paper.pdf") documents = pdf_loader.load() text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50) pdf_doc = text_splitter.split_documents(documents) documents = json_doc + pdf_doc # Initialize OpenAI embeddings embeddings = OpenAIEmbeddings() # Create Chroma vector store vector_store = FAISS.from_documents(documents, embeddings) llm = ChatOpenAI(model="gpt-4o") # Create a retriever retriever = vector_store.as_retriever() # Create a QA chain qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, chain_type="stuff", return_source_documents=True) question = { 'Q1': 'How do I compare gene expression across different models?', 'Q2': 'How do I filter datasets by genotype?', 'Q3': 'Can I analyze PCA plots to explore differences between NSCLC and SCLC?', 'Q4': "How can I compare primary and metastatic samples?", 'Q5': "Where can I download reprocessed gene expression data?", 'Other': "Other questions." } def openai(message, history): # print(message) # if message == question["Q1"]: # return "You can reference the tutorial section 'Data Analysis: 2. Examine by gene of interest - Using Reprocessed expression data' for details on comparing gene expression across models. You can also use the 'Dotplot' visualization to compare sample types, genotypes, and treatments within the application." # if message == question["Q2"]: # return "To filter datasets by genotype, navigate to the 'Data Review: 3. GEMMs' section in the tutorial. Use the filtering options in the table to specify gene combinations or select specific genotypes." # if message == question["Q3"]: # return "Yes, you can use the 'Data Analysis: 2. Examine by gene of interest - Using Reprocessed expression data' section in the app to generate PCA plots. Color-coding by histology or primary/metastasis status is available to visualize differences between NSCLC and SCLC." # if message == question["Q4"]: # return "Use the 'Primary vs. Metastasis' comparison feature, as described in 'Data Analysis: 1. Examine by gene of interest - Using Depositor-processed expression data.' This tool compares gene expression across metastatic states within preselected studies." # if message == question["Q5"]: # return "You can download reprocessed data by platform from the 'Data Review: 4. Download' section in the app. Both sample data and gene expression data are available in platform-specific datasets." # if message == question["Other"]: # return "Ask me any other questions." # else: answer = qa_chain.invoke({"query": message}) res = answer['result'] return res demo = gr.ChatInterface( fn = openai, type = "messages", chatbot=gr.Chatbot(), # chatbot=gr.Chatbot(), title="test bot", # description="Ask Yes Man any question", theme="ocean", examples=[question["Q1"], question["Q2"], question["Q3"], question["Q4"], question["Q5"], question["Other"]], # cache_examples=True, css = ".svelte-i3tvor {visibility: hidden} footer {visibility: hidden} .placeholder-content.svelte-lykdgn.svelte-lykdgn {justify-content: flex-end; flex-direction: row} .examples.svelte-lykdgn.svelte-lykdgn { display: block; margin: 0 } .example.svelte-lykdgn.svelte-lykdgn{max-width: 300px; margin-bottom: 15px}", ) if __name__ == "__main__": demo.launch()