multimodalRAG / app.py
sunny333's picture
initial commit
568cd7b
import gradio as gr
from PIL import Image
import random
from RAG_MLM import extractor as ex
from RAG_MLM import differentiator as dif
from RAG_MLM import summary as sm
from RAG_MLM import embedder as eb
from RAG_MLM import ragMLM as rag
from RAG_MLM import utility as ut
from PIL import Image
import base64
from io import BytesIO
import os
#----image utility-----
def plt_img_base64(img_base64):
"""Disply base64 encoded string as image"""
# Decode the base64 string
img_data = base64.b64decode(img_base64)
# Create a BytesIO object
img_buffer = BytesIO(img_data)
# Open the image using PIL
img = Image.open(img_buffer)
return img
# Dummy text generation function
def generate_text(input_text):
return f"Echo: {input_text}"
# Dummy multiple images generation
def generate_images(n,imgList):
images = []
for _ in range(n):
img = Image.new('RGB', (200, 200), color=(random.randint(0,255), random.randint(0,255), random.randint(0,255)))
images.append(img)
for item in imgList:
img = plt_img_base64(item)
images.append(img)
return images
# Main processing function
def process_input(query):
response_text = f"Processed: {query}"
context = "This is some dummy context."
images = [["https://via.placeholder.com/150", "https://via.placeholder.com/150"]]
return response_text, context, images
# Wrapper for utility function
def utility_function_wrapper(input_text):
ex.extractor_text_image_table()
dif.differentiate_table_text
sm.save_summary()
return "sucess:- generated files"
# Dummy API Key handler
def save_api_key(api_key):
# you can save this key to a file, env var, or in memory
print(f"Received API Key: {api_key}")
os.environ["OPENAI_API_KEY"] = api_key
return "✅ API Key saved in environment successfully!"
# Function to clear API Key from environment
def clear_api_key():
if "OPENAI_API_KEY" in os.environ:
del os.environ["OPENAI_API_KEY"]
return "❌ API Key cleared from environment!"
else:
return "⚠️ No API Key found to clear."
# The function Gradio will call
def process_input(user_input):
#------calling llm------
#docs = eb.retriever_multi_vector.invoke(user_input, limit=5)
#r = ut.split_image_text_types(docs)
response = rag.multimodal_rag_w_sources.invoke({'input': user_input})
text_sources = response['context']['texts']
text_sources = ut.beautify_output(text_sources)
text_answer = response['answer']
#text_answer = ut.beautify_output(text_answer)
img_sources = response['context']['images']
#---------end-----------
#text_response = generate_text(user_input)
image_responses = generate_images(1,img_sources) # generate 3 random images
return text_answer,text_sources, image_responses
# Define Gradio interface
# Main UI
with gr.Blocks() as iface:
with gr.Tab("Main App"):
input_query = gr.Textbox(lines=2, placeholder="Enter your query here...")
submit_button = gr.Button("Submit Query")
response_text = gr.Textbox(label="Response Text")
context = gr.Textbox(label="Context")
gallery = gr.Gallery(label="Response Images", columns=[3], height="auto")
submit_button.click(
process_input,
inputs=input_query,
outputs=[response_text, context, gallery]
)
with gr.Tab("Utility Functions"):
utility_input = gr.Textbox(lines=2, placeholder="Enter input for utility...")
utility_button = gr.Button("Run Utility Function")
utility_output = gr.Textbox(label="Utility Function Output")
utility_button.click(
utility_function_wrapper,
#inputs=utility_input,
outputs=utility_output
)
with gr.Tab("API Key Config"):
api_key_input = gr.Textbox(type="password", placeholder="Enter your API key securely...")
api_key_button = gr.Button("Save API Key")
clear_api_key_button = gr.Button("Clear API Key")
api_key_output = gr.Textbox(label="API Key Save Status")
api_key_button.click(
save_api_key,
inputs=api_key_input,
outputs=api_key_output
)
clear_api_key_button.click(
clear_api_key,
inputs=[],
outputs=api_key_output
)
# Launch
iface.launch()
#------