chatbot / app.py
gzsbwfj03's picture
Update space
1d872cd
import gradio as gr
import random
import os
import re
import json
from openai import OpenAI
from langchain_openai import ChatOpenAI
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_community.document_loaders import JSONLoader
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain.chains import RetrievalQA
os.environ["OPENAI_API_KEY"] = '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'
client = OpenAI(
api_key=os.environ.get("OPENAI_API_KEY"), # This is the default and can be omitted
)
## Load JSON file
json_loader = JSONLoader(
file_path="data.json",
jq_schema=".[]", # Extract each JSON object from the list
text_content=False # Store "title" as metadata
)
json_doc = json_loader.load()
## Load PDF file
pdf_loader = PyPDFLoader("./paper.pdf")
documents = pdf_loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
pdf_doc = text_splitter.split_documents(documents)
documents = json_doc + pdf_doc
# Initialize OpenAI embeddings
embeddings = OpenAIEmbeddings()
# Create Chroma vector store
vector_store = FAISS.from_documents(documents, embeddings)
llm = ChatOpenAI(model="gpt-4o")
# Create a retriever
retriever = vector_store.as_retriever()
# Create a QA chain
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, chain_type="stuff",
return_source_documents=True)
question = {
'Q1': 'How do I compare gene expression across different models?',
'Q2': 'How do I filter datasets by genotype?',
'Q3': 'Can I analyze PCA plots to explore differences between NSCLC and SCLC?',
'Q4': "How can I compare primary and metastatic samples?",
'Q5': "Where can I download reprocessed gene expression data?",
'Other': "Other questions."
}
def openai(message, history):
# print(message)
# if message == question["Q1"]:
# return "You can reference the tutorial section 'Data Analysis: 2. Examine by gene of interest - Using Reprocessed expression data' for details on comparing gene expression across models. You can also use the 'Dotplot' visualization to compare sample types, genotypes, and treatments within the application."
# if message == question["Q2"]:
# return "To filter datasets by genotype, navigate to the 'Data Review: 3. GEMMs' section in the tutorial. Use the filtering options in the table to specify gene combinations or select specific genotypes."
# if message == question["Q3"]:
# return "Yes, you can use the 'Data Analysis: 2. Examine by gene of interest - Using Reprocessed expression data' section in the app to generate PCA plots. Color-coding by histology or primary/metastasis status is available to visualize differences between NSCLC and SCLC."
# if message == question["Q4"]:
# return "Use the 'Primary vs. Metastasis' comparison feature, as described in 'Data Analysis: 1. Examine by gene of interest - Using Depositor-processed expression data.' This tool compares gene expression across metastatic states within preselected studies."
# if message == question["Q5"]:
# return "You can download reprocessed data by platform from the 'Data Review: 4. Download' section in the app. Both sample data and gene expression data are available in platform-specific datasets."
# if message == question["Other"]:
# return "Ask me any other questions."
# else:
answer = qa_chain.invoke({"query": message})
res = answer['result']
return res
demo = gr.ChatInterface(
fn = openai,
type = "messages",
chatbot=gr.Chatbot(),
# chatbot=gr.Chatbot(),
title="test bot",
# description="Ask Yes Man any question",
theme="ocean",
examples=[question["Q1"], question["Q2"], question["Q3"], question["Q4"], question["Q5"], question["Other"]],
# cache_examples=True,
css = ".svelte-i3tvor {visibility: hidden} footer {visibility: hidden} .placeholder-content.svelte-lykdgn.svelte-lykdgn {justify-content: flex-end; flex-direction: row} .examples.svelte-lykdgn.svelte-lykdgn { display: block; margin: 0 } .example.svelte-lykdgn.svelte-lykdgn{max-width: 300px; margin-bottom: 15px}",
)
if __name__ == "__main__":
demo.launch()