Spaces:

sunny333
/

multimodalRAG

Runtime error

App Files Files Community

sunny333 commited on Apr 27, 2025

Commit

568cd7b

1 Parent(s): 731be0b

initial commit

Browse files

Files changed (12) hide show

RAG_MLM/__init__.py +0 -0
RAG_MLM/app.py +66 -0
RAG_MLM/differentiator.py +34 -0
RAG_MLM/embedder.py +142 -0
RAG_MLM/extractor.py +38 -0
RAG_MLM/main.yaml +11 -0
RAG_MLM/ragMLM.py +98 -0
RAG_MLM/summary.py +138 -0
RAG_MLM/utility.py +65 -0
app.py +135 -0
requirements.txt +18 -0
resistest.py +11 -0

RAG_MLM/__init__.py ADDED Viewed

File without changes

RAG_MLM/app.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import gradio as gr
+from PIL import Image
+import random
+import embedder as eb
+import utility as ut
+import ragMLM as rag
+from PIL import Image
+import base64
+from io import BytesIO
+#----image utility-----
+def plt_img_base64(img_base64):
+    """Disply base64 encoded string as image"""
+    # Decode the base64 string
+    img_data = base64.b64decode(img_base64)
+    # Create a BytesIO object
+    img_buffer = BytesIO(img_data)
+    # Open the image using PIL
+    img = Image.open(img_buffer)
+    return img
+# Dummy text generation function
+def generate_text(input_text):
+    return f"Echo: {input_text}"
+# Dummy multiple images generation
+def generate_images(n,imgList):
+    images = []
+    for _ in range(n):
+        img = Image.new('RGB', (200, 200), color=(random.randint(0,255), random.randint(0,255), random.randint(0,255)))
+        images.append(img)
+    for item in imgList:
+        img = plt_img_base64(item)
+        images.append(img)
+    return images
+# The function Gradio will call
+def process_input(user_input):
+    #------calling llm------
+    #docs = eb.retriever_multi_vector.invoke(user_input, limit=5)
+    #r = ut.split_image_text_types(docs)
+    response = rag.multimodal_rag_w_sources.invoke({'input': user_input})
+    text_sources = response['context']['texts']
+    text_sources = ut.beautify_output(text_sources)
+    text_answer = response['answer']
+    #text_answer = ut.beautify_output(text_answer)
+    img_sources = response['context']['images']
+    #---------end-----------
+    #text_response = generate_text(user_input)
+    image_responses = generate_images(1,img_sources)  # generate 3 random images
+    return text_answer,text_sources, image_responses
+# Define Gradio interface
+iface = gr.Interface(
+    fn=process_input,
+    inputs=gr.Textbox(lines=2, placeholder="Enter your query here..."),
+    outputs=[
+        gr.Textbox(label="Response Text"),
+        gr.Textbox(label="Context"),
+        gr.Gallery(label="Response Images", columns=[3], height="auto")
+    ],
+    title="Text to Text + Multiple Images Demo",
+    description="Enter a query and get text plus multiple images!"
+)
+iface.launch()

RAG_MLM/differentiator.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import pickle
+import htmltabletomd
+import os
+data = ""
+with open('data.pkl', 'rb') as f:
+    data = pickle.load(f)
+def differentiate_table_text():
+    docs = []
+    tables = []
+    for doc in data:
+        if doc.metadata['category'] == 'Table':
+            tables.append(doc)
+        elif doc.metadata['category'] == 'CompositeElement':
+            docs.append(doc)
+    for table in tables:
+        table.page_content = htmltabletomd.convert_table(table.metadata['text_as_html'])
+    print(f"length of docs {len(docs)}, length of tables {len(tables)}")
+    with open('RAG_MLM/docs.pkl', 'wb') as f:
+        pickle.dump(docs, f)
+    with open('RAG_MLM/table.pkl', 'wb') as f:
+        pickle.dump(tables, f)
+# call this for differentiator
+file_path="RAG_MLM/docs.pkl"
+if os.path.exists(file_path):
+    print(f"✅ File '{file_path}' found")
+else:
+    print(">>>>>>> generating: differentiating tables and text")
+    differentiate_table_text()

RAG_MLM/embedder.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import uuid
+from langchain.retrievers.multi_vector import MultiVectorRetriever
+from langchain_community.storage import RedisStore
+from langchain_community.utilities.redis import get_client
+from langchain_chroma import Chroma
+from langchain_core.documents import Document
+from langchain_openai import OpenAIEmbeddings
+import pickle
+import redis
+import os
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+print(OPENAI_API_KEY)
+openai_embed_model = OpenAIEmbeddings(model='text-embedding-3-small')
+#-----remote redis------
+r = redis.Redis(
+  host='prompt-firefly-14099.upstash.io',
+  port=6379,
+  password='ATcTAAIjcDFhMjQ2OGIwMzA4ODU0MzEyYTZlNGI2MjUwZmMzZTRhM3AxMA',
+  ssl=True
+)
+redis_url = "rediss://:ATcTAAIjcDFhMjQ2OGIwMzA4ODU0MzEyYTZlNGI2MjUwZmMzZTRhM3AxMA@prompt-firefly-14099.upstash.io:6379"
+r = redis.from_url(redis_url)
+redis_store = RedisStore(client=r)
+#------
+#-----local redis------
+#client = get_client('redis://localhost:6379')
+#redis_store = RedisStore(client=client)
+#-----------------------------------
+#-------pickle loading-----------
+text_summaries, text_docs = "",""
+table_summaries, table_docs = "",""
+image_summaries, imgs_base64 ="",""
+with open('RAG_MLM/text_summaries.pkl', 'rb') as f:
+    text_summaries = pickle.load(f)
+with open('RAG_MLM/docs.pkl', 'rb') as f:
+    text_docs = pickle.load(f)
+with open('RAG_MLM/table_summaries.pkl', 'rb') as f:
+    table_summaries = pickle.load(f)
+with open('RAG_MLM/table.pkl', 'rb') as f:
+    table_docs = pickle.load(f)
+with open('RAG_MLM/image_summaries.pkl', 'rb') as f:
+    image_summaries = pickle.load(f)
+with open('RAG_MLM/img_base64_list.pkl', 'rb') as f:
+    imgs_base64 = pickle.load(f)
+#--------------------------
+def create_multi_vector_retriever(
+    docstore, vectorstore, text_summaries, texts, table_summaries, tables,
+    image_summaries, images
+):
+    """
+    Create retriever that indexes summaries, but returns raw images or texts
+    """
+    id_key = "doc_id"
+    # Create the multi-vector retriever
+    retriever = MultiVectorRetriever(
+        vectorstore=vectorstore,
+        docstore=docstore,
+        id_key=id_key,
+    )
+    # Helper function to add documents to the vectorstore and docstore
+    def add_documents(retriever, doc_summaries, doc_contents):
+        doc_ids = [str(uuid.uuid4()) for _ in doc_contents]
+        summary_docs = [
+            Document(page_content=s, metadata={id_key: doc_ids[i]})
+            for i, s in enumerate(doc_summaries)
+        ]
+        retriever.vectorstore.add_documents(summary_docs)
+        raw_contents = [doc.page_content if isinstance(doc, Document) else doc for doc in doc_contents]
+        retriever.docstore.mset(list(zip(doc_ids, raw_contents)))
+    # Add texts, tables, and images
+    # Check that text_summaries is not empty before adding
+    if text_summaries:
+        add_documents(retriever, text_summaries, texts)
+    # Check that table_summaries is not empty before adding
+    if table_summaries:
+        add_documents(retriever, table_summaries, tables)
+    # Check that image_summaries is not empty before adding
+    if image_summaries:
+        add_documents(retriever, image_summaries, images)
+    return retriever
+chroma_db = Chroma(
+    collection_name="mm_rag",
+    embedding_function=openai_embed_model,
+    collection_metadata={"hnsw:space": "cosine"},
+)
+# Create retriever
+retriever_multi_vector = create_multi_vector_retriever(
+    redis_store,  chroma_db,
+    text_summaries, text_docs,
+    table_summaries, table_docs,
+    image_summaries, imgs_base64,
+)
+#------utility------
+from PIL import Image
+import base64
+from io import BytesIO
+def plt_img_base64(img_base64):
+    """Disply base64 encoded string as image"""
+    # Decode the base64 string
+    img_data = base64.b64decode(img_base64)
+    # Create a BytesIO object
+    img_buffer = BytesIO(img_data)
+    # Open the image using PIL
+    img = Image.open(img_buffer)
+    #display(img)
+# Check retrieval-----uncomment to check diretly ----
+#query = "tell me about free body diagram"
+#docs = retriever_multi_vector.invoke(query, limit=5)
+# We get 3 relevant docs
+#print(">>>>>>>>",len(docs))
+#print(docs[0])

RAG_MLM/extractor.py ADDED Viewed

	@@ -0,0 +1,38 @@

+from langchain_community.document_loaders import UnstructuredPDFLoader
+import os
+import pickle
+doc = 'data/filteredData.pdf'
+def extractor_text_image_table():
+    loader = UnstructuredPDFLoader(file_path=doc,
+                                strategy='hi_res',
+                                extract_images_in_pdf=True,
+                                infer_table_structure=True,
+                                # section-based chunking
+                                chunking_strategy="by_title",
+                                max_characters=4000, # max size of chunks
+                                new_after_n_chars=4000, # preferred size of chunks
+                # smaller chunks < 2000 chars will be combined into a larger chunk
+                                combine_text_under_n_chars=2000,
+                                mode='elements',
+                                image_output_dir_path='./figures')
+    data = loader.load()
+    print_retrived_data(data)
+    with open('data.pkl', 'wb') as f:
+        pickle.dump(data, f)
+def print_retrived_data(data):
+    print(">>>>>>>>>>>>>>data retrived>>>>>>>>")
+    print([doc.metadata['category'] for doc in data])
+    print(">>>>>>>>>>>>>>end -- data retrived>>>>>>>>")
+# call this to extract images
+file_path="data.pkl"
+if os.path.exists(file_path):
+    print(f"✅ File '{file_path}' found")
+else:
+    print(">>>>>>>> generating: extracting text images tables >>>>>")
+    extractor_text_image_table()

RAG_MLM/main.yaml ADDED Viewed

	@@ -0,0 +1,11 @@

+extractor
+|
+differentiator
+|
+summary
+|
+embedder
+|
+ragMLM
+|
+app

RAG_MLM/ragMLM.py ADDED Viewed

	@@ -0,0 +1,98 @@

+from operator import itemgetter
+from langchain_core.runnables import RunnableLambda, RunnablePassthrough
+from langchain_core.messages import HumanMessage
+from . import utility as ut
+from . import embedder as ed
+from langchain_core.output_parsers import StrOutputParser
+from langchain_openai import ChatOpenAI
+import os
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+print(OPENAI_API_KEY)
+chatgpt = ChatOpenAI(model_name='gpt-4o', temperature=0)
+def multimodal_prompt_function(data_dict):
+    """
+    Create a multimodal prompt with both text and image context.
+    This function formats the provided context from `data_dict`, which contains
+    text, tables, and base64-encoded images. It joins the text (with table) portions
+    and prepares the image(s) in a base64-encoded format to be included in a
+    message.
+    The formatted text and images (context) along with the user question are used to
+    construct a prompt for GPT-4o
+    """
+    formatted_texts = "\n".join(data_dict["context"]["texts"])
+    messages = []
+    # Adding image(s) to the messages if present
+    if data_dict["context"]["images"]:
+        for image in data_dict["context"]["images"]:
+            image_message = {
+                "type": "image_url",
+                "image_url": {"url": f"data:image/jpeg;base64,{image}"},
+            }
+            messages.append(image_message)
+    # Adding the text for analysis
+    text_message = {
+        "type": "text",
+        "text": (
+            f"""You are an analyst tasked with understanding detailed information
+                and trends from text documents,
+                data tables, and charts and graphs in images.
+                You will be given context information below which will be a mix of
+                text, tables, and images usually of charts or graphs.
+                Use this information to provide answers related to the user
+                question.
+                Do not make up answers, use the provided context documents below and
+                answer the question to the best of your ability.
+                User question:
+                {data_dict['question']}
+                Context documents:
+                {formatted_texts}
+                Answer:
+            """
+        ),
+    }
+    messages.append(text_message)
+    return [HumanMessage(content=messages)]
+# Create RAG chain
+multimodal_rag = (
+        {
+            "context": itemgetter('context'),
+            "question": itemgetter('input'),
+        }
+            |
+        RunnableLambda(multimodal_prompt_function)
+            |
+        chatgpt
+            |
+        StrOutputParser()
+)
+# Pass input query to retriever and get context document elements
+retrieve_docs = (itemgetter('input')
+                    |
+                ed.retriever_multi_vector
+                    |
+                RunnableLambda(ut.split_image_text_types))
+# Below, we chain `.assign` calls. This takes a dict and successively
+# adds keys-- "context" and "answer"-- where the value for each key
+# is determined by a Runnable (function or chain executing at runtime).
+# This helps in having the retrieved context along with the answer generated by GPT-4o
+multimodal_rag_w_sources = (RunnablePassthrough.assign(context=retrieve_docs)
+                                               .assign(answer=multimodal_rag)
+)
+#------ direct testing-------
+#response = multimodal_rag_w_sources.invoke({'input': query})

RAG_MLM/summary.py ADDED Viewed

	@@ -0,0 +1,138 @@

+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_openai import ChatOpenAI
+from langchain_core.runnables import RunnablePassthrough
+from langchain_openai import ChatOpenAI
+import base64
+import os
+from langchain_core.messages import HumanMessage
+import os
+import pickle
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+print(OPENAI_API_KEY)
+chatgpt = ChatOpenAI(model_name='gpt-4o', temperature=0)
+docs = []
+tables = []
+with open('RAG_MLM/docs.pkl', 'rb') as f:
+    docs = pickle.load(f)
+with open('RAG_MLM/table.pkl', 'rb') as f:
+    tables = pickle.load(f)
+def summarize_all():
+    # Prompt
+    prompt_text = """
+    You are an assistant tasked with summarizing tables and text particularly for semantic retrieval.
+    These summaries will be embedded and used to retrieve the raw text or table elements
+    Give a detailed summary of the table or text below that is well optimized for retrieval.
+    For any tables also add in a one line description of what the table is about besides the summary.
+    Do not add additional words like Summary: etc.
+    Table or text chunk:
+    {element}
+    """
+    prompt = ChatPromptTemplate.from_template(prompt_text)
+    # Summary chain
+    summarize_chain = (
+                        {"element": RunnablePassthrough()}
+                        |
+                        prompt
+                        |
+                        chatgpt
+                        |
+                        StrOutputParser() # extracts response as text
+    )
+    # Initialize empty summaries
+    text_summaries = []
+    table_summaries = []
+    text_docs = [doc.page_content for doc in docs]
+    table_docs = [table.page_content for table in tables]
+    text_summaries = summarize_chain.batch(text_docs, {"max_concurrency": 5})
+    table_summaries = summarize_chain.batch(table_docs, {"max_concurrency": 5})
+    print(text_summaries[1])
+    return text_summaries,table_summaries
+def encode_image(image_path):
+    """Getting the base64 string"""
+    with open(image_path, "rb") as image_file:
+        return base64.b64encode(image_file.read()).decode("utf-8")
+# create a function to summarize the image by passing a prompt to GPT-4o
+def image_summarize(img_base64, prompt):
+    """Make image summary"""
+    chat = ChatOpenAI(model="gpt-4o", temperature=0)
+    msg = chat.invoke(
+        [
+            HumanMessage(
+                content=[
+                    {"type": "text", "text": prompt},
+                    {
+                        "type": "image_url",
+                        "image_url": {"url":
+                                     f"data:image/jpeg;base64,{img_base64}"},
+                    },
+                ]
+            )
+        ]
+    )
+    return msg.content
+def generate_img_summaries(path):
+    """
+    Generate summaries and base64 encoded strings for images
+    path: Path to list of .jpg files extracted by Unstructured
+    """
+    # Store base64 encoded images
+    img_base64_list = []
+    # Store image summaries
+    image_summaries = []
+    # Prompt
+    prompt = """You are an assistant tasked with summarizing images for retrieval.
+                Remember these images could potentially contain graphs, charts or
+                tables also.
+                These summaries will be embedded and used to retrieve the raw image
+                for question answering.
+                Give a detailed summary of the image that is well optimized for
+                retrieval.
+                Do not add additional words like Summary: etc.
+             """
+    # Apply to images
+    for img_file in sorted(os.listdir(path)):
+        if img_file.endswith(".jpg"):
+            img_path = os.path.join(path, img_file)
+            base64_image = encode_image(img_path)
+            img_base64_list.append(base64_image)
+            image_summaries.append(image_summarize(base64_image, prompt))
+    return img_base64_list, image_summaries
+def save_summary():
+   path = './figures'
+   img_base64_list, image_summaries = generate_img_summaries(path)
+   with open('RAG_MLM/img_base64_list.pkl', 'wb') as f:
+        pickle.dump(img_base64_list, f)
+   with open('RAG_MLM/image_summaries.pkl', 'wb') as f:
+        pickle.dump(image_summaries, f)
+   text_summaries,table_summaries = summarize_all()
+   with open('RAG_MLM/text_summaries.pkl', 'wb') as f:
+        pickle.dump(text_summaries, f)
+   with open('RAG_MLM/table_summaries.pkl', 'wb') as f:
+        pickle.dump(table_summaries, f)
+# call this to save summary----
+file_path="RAG_MLM/text_summaries.pkl"
+if os.path.exists(file_path):
+    print(f"✅ File '{file_path}' found")
+else:
+    save_summary()

RAG_MLM/utility.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import re
+import base64
+from langchain_core.documents import Document
+# helps in detecting base64 encoded strings
+def looks_like_base64(sb):
+    """Check if the string looks like base64"""
+    return re.match("^[A-Za-z0-9+/]+[=]{0,2}$", sb) is not None
+# helps in checking if the base64 encoded image is actually an image
+def is_image_data(b64data):
+    """
+    Check if the base64 data is an image by looking at the start of the data
+    """
+    image_signatures = {
+        b"\xff\xd8\xff": "jpg",
+        b"\x89\x50\x4e\x47\x0d\x0a\x1a\x0a": "png",
+        b"\x47\x49\x46\x38": "gif",
+        b"\x52\x49\x46\x46": "webp",
+    }
+    try:
+        header = base64.b64decode(b64data)[:8]  # Decode and get the first 8 bytes
+        for sig, format in image_signatures.items():
+            if header.startswith(sig):
+                return True
+        return False
+    except Exception:
+        return False
+# returns a dictionary separating images and text (with table) elements
+def split_image_text_types(docs):
+    """
+    Split base64-encoded images and texts (with tables)
+    """
+    b64_images = []
+    texts = []
+    for doc in docs:
+        # Check if the document is of type Document and extract page_content if so
+        if isinstance(doc, Document):
+            doc = doc.page_content.decode('utf-8')
+        else:
+            doc = doc.decode('utf-8')
+        if looks_like_base64(doc) and is_image_data(doc):
+            b64_images.append(doc)
+        else:
+            texts.append(doc)
+    return {"images": b64_images, "texts": texts}
+def beautify_output(text_list):
+    # Combine list into single text
+    raw_text = " ".join(text_list)
+    # Remove unwanted characters like [|<, random numbers between newlines
+    cleaned_text = re.sub(r'\[\|\<\s*\d*\s*', '', raw_text)
+    cleaned_text = re.sub(r'\n+', '\n\n', cleaned_text)  # Replace multiple \n with 2 newlines
+    cleaned_text = re.sub(r'\s+', ' ', cleaned_text)     # Replace multiple spaces with single space
+    cleaned_text = re.sub(r'([.!?])\s*', r'\1\n\n', cleaned_text)  # Newline after periods, exclamation, question marks
+    # Remove weird number artifacts (like 24) not attached to a sentence
+    cleaned_text = re.sub(r'(\n\n)\d+(\n\n)', r'\1', cleaned_text)
+    # Strip leading/trailing spaces
+    cleaned_text = cleaned_text.strip()
+    return cleaned_text

app.py ADDED Viewed

	@@ -0,0 +1,135 @@

+import gradio as gr
+from PIL import Image
+import random
+from RAG_MLM import extractor as ex
+from RAG_MLM import differentiator as dif
+from RAG_MLM import summary as sm
+from RAG_MLM import embedder as eb
+from RAG_MLM import ragMLM as rag
+from RAG_MLM import utility as ut
+from PIL import Image
+import base64
+from io import BytesIO
+import os
+#----image utility-----
+def plt_img_base64(img_base64):
+    """Disply base64 encoded string as image"""
+    # Decode the base64 string
+    img_data = base64.b64decode(img_base64)
+    # Create a BytesIO object
+    img_buffer = BytesIO(img_data)
+    # Open the image using PIL
+    img = Image.open(img_buffer)
+    return img
+# Dummy text generation function
+def generate_text(input_text):
+    return f"Echo: {input_text}"
+# Dummy multiple images generation
+def generate_images(n,imgList):
+    images = []
+    for _ in range(n):
+        img = Image.new('RGB', (200, 200), color=(random.randint(0,255), random.randint(0,255), random.randint(0,255)))
+        images.append(img)
+    for item in imgList:
+        img = plt_img_base64(item)
+        images.append(img)
+    return images
+# Main processing function
+def process_input(query):
+    response_text = f"Processed: {query}"
+    context = "This is some dummy context."
+    images = [["https://via.placeholder.com/150", "https://via.placeholder.com/150"]]
+    return response_text, context, images
+# Wrapper for utility function
+def utility_function_wrapper(input_text):
+    ex.extractor_text_image_table()
+    dif.differentiate_table_text
+    sm.save_summary()
+    return "sucess:- generated files"
+# Dummy API Key handler
+def save_api_key(api_key):
+    # you can save this key to a file, env var, or in memory
+    print(f"Received API Key: {api_key}")
+    os.environ["OPENAI_API_KEY"] = api_key
+    return "✅ API Key saved in environment successfully!"
+# Function to clear API Key from environment
+def clear_api_key():
+    if "OPENAI_API_KEY" in os.environ:
+        del os.environ["OPENAI_API_KEY"]
+        return "❌ API Key cleared from environment!"
+    else:
+        return "⚠️ No API Key found to clear."
+# The function Gradio will call
+def process_input(user_input):
+    #------calling llm------
+    #docs = eb.retriever_multi_vector.invoke(user_input, limit=5)
+    #r = ut.split_image_text_types(docs)
+    response = rag.multimodal_rag_w_sources.invoke({'input': user_input})
+    text_sources = response['context']['texts']
+    text_sources = ut.beautify_output(text_sources)
+    text_answer = response['answer']
+    #text_answer = ut.beautify_output(text_answer)
+    img_sources = response['context']['images']
+    #---------end-----------
+    #text_response = generate_text(user_input)
+    image_responses = generate_images(1,img_sources)  # generate 3 random images
+    return text_answer,text_sources, image_responses
+# Define Gradio interface
+# Main UI
+with gr.Blocks() as iface:
+    with gr.Tab("Main App"):
+        input_query = gr.Textbox(lines=2, placeholder="Enter your query here...")
+        submit_button = gr.Button("Submit Query")
+        response_text = gr.Textbox(label="Response Text")
+        context = gr.Textbox(label="Context")
+        gallery = gr.Gallery(label="Response Images", columns=[3], height="auto")
+        submit_button.click(
+            process_input,
+            inputs=input_query,
+            outputs=[response_text, context, gallery]
+        )
+    with gr.Tab("Utility Functions"):
+        utility_input = gr.Textbox(lines=2, placeholder="Enter input for utility...")
+        utility_button = gr.Button("Run Utility Function")
+        utility_output = gr.Textbox(label="Utility Function Output")
+        utility_button.click(
+            utility_function_wrapper,
+            #inputs=utility_input,
+            outputs=utility_output
+        )
+    with gr.Tab("API Key Config"):
+        api_key_input = gr.Textbox(type="password", placeholder="Enter your API key securely...")
+        api_key_button = gr.Button("Save API Key")
+        clear_api_key_button = gr.Button("Clear API Key")
+        api_key_output = gr.Textbox(label="API Key Save Status")
+        api_key_button.click(
+            save_api_key,
+            inputs=api_key_input,
+            outputs=api_key_output
+        )
+        clear_api_key_button.click(
+        clear_api_key,
+        inputs=[],
+        outputs=api_key_output
+    )
+# Launch
+iface.launch()
+#------

requirements.txt ADDED Viewed

	@@ -0,0 +1,18 @@

+langchain
+langchain-openai
+langchain-chroma
+langchain-community
+langchain-experimental
+htmltabletomd
+pdf2image
+pillow
+unstructured[all-docs]
+pdfminer
+# install OCR dependencies for unstructured
+pytesseract
+poppler-utils
+langchain-experimental
+pdfminer
+redis
+gradio

resistest.py ADDED Viewed

	@@ -0,0 +1,11 @@

+import redis
+r = redis.Redis(
+  host='prompt-firefly-14099.upstash.io',
+  port=6379,
+  password='ATcTAAIjcDFhMjQ2OGIwMzA4ODU0MzEyYTZlNGI2MjUwZmMzZTRhM3AxMA',
+  ssl=True
+)
+r.set('foo', 'bar')
+print(r.get('foo'))