Agent_RAG2 / src /streamlit_app.py
PRSHNTKUMR's picture
Update src/streamlit_app.py
a4eadbd verified
# 🫠 Clean and Final Streamlit RAG App (Three-Agent Architecture)
# --- Environment Setup (Safe for Hugging Face) ---
import os
os.environ["STREAMLIT_HOME"] = "/tmp"
os.environ["XDG_CONFIG_HOME"] = "/tmp"
os.environ["XDG_DATA_HOME"] = "/tmp"
os.environ["HOME"] = "/tmp"
import asyncio
try:
asyncio.get_running_loop()
except RuntimeError:
asyncio.set_event_loop(asyncio.new_event_loop())
# --- Imports ---
import streamlit as st
import pandas as pd
import json
import io
from dotenv import load_dotenv, find_dotenv
from langchain_openai import OpenAIEmbeddings, OpenAI
from langchain_community.vectorstores import FAISS
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import RetrievalQA
import PyPDF2
from docx import Document
# --- Load API Key Securely ---
_ = load_dotenv(find_dotenv())
try:
API_KEY = st.secrets["OPENAI_API_KEY"]
except st.runtime.secrets.StreamlitSecretNotFoundError:
API_KEY = os.getenv("OPENAI_API_KEY")
embeddings_model = OpenAIEmbeddings(openai_api_key=API_KEY)
# --- Streamlit Page Setup ---
st.set_page_config(page_title="RAG File Chat", layout="centered")
st.title("🧠 Chat with Your Uploaded File")
# --- Agent 1: File Ingestion and Indexing ---
def agent_alpha_file_uploader(file_content, file_type):
st.info("πŸ“‚ Agent 1: Loading and indexing your file...")
if file_type == "csv":
df = pd.read_csv(io.StringIO(file_content.decode("utf-8")))
llm = OpenAI(openai_api_key=API_KEY)
return create_pandas_dataframe_agent(llm, df, verbose=False), None
elif file_type == "xlsx":
df = pd.read_excel(file_content)
llm = OpenAI(openai_api_key=API_KEY)
return create_pandas_dataframe_agent(llm, df, verbose=False), None
elif file_type == "json":
df = pd.DataFrame(json.loads(file_content.decode("utf-8")))
llm = OpenAI(openai_api_key=API_KEY)
return create_pandas_dataframe_agent(llm, df, verbose=False), None
elif file_type in ["pdf", "docx"]:
text = extract_text_from_file(file_content, file_type)
chunks = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0).split_text(text)
vectorstore = FAISS.from_texts(chunks, embeddings_model)
return None, vectorstore
else:
st.error("❌ Unsupported file type.")
return None, None
# --- Agent 2: Query Resolution ---
def agent_beta_query_processor(query, file_type, df_agent=None, vectorstore=None):
st.info("🧠 Agent 2: Processing your question...")
if file_type in ["pdf", "docx"]:
qa_chain = RetrievalQA.from_chain_type(
llm=OpenAI(openai_api_key=API_KEY),
chain_type="stuff",
retriever=vectorstore.as_retriever(search_kwargs={"k": 5}),
)
result = qa_chain({"query": query})
return result["result"]
else:
return df_agent.run(query)
# --- Agent 3: Response Enhancement ---
def agent_gamma_response_enhancer(response):
st.info("πŸ” Agent 3: Reviewing and enhancing the response...")
enhancement_prompt = f"Improve the clarity and format of the following response:\n{response}"
llm = OpenAI(openai_api_key=API_KEY)
return llm.invoke(enhancement_prompt)
# --- Helper Function for Text Extraction ---
def extract_text_from_file(file_content, file_type):
if file_type == "pdf":
reader = PyPDF2.PdfReader(io.BytesIO(file_content))
return "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
elif file_type == "docx":
doc = Document(io.BytesIO(file_content))
return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
return ""
# --- Session State ---
if "uploaded_file" not in st.session_state:
st.session_state.uploaded_file = None
if "file_uploaded" not in st.session_state:
st.session_state.file_uploaded = False
if "vectorstore" not in st.session_state:
st.session_state.vectorstore = None
if "agent" not in st.session_state:
st.session_state.agent = None
if "file_type" not in st.session_state:
st.session_state.file_type = None
# --- File Upload UI ---
MAX_SIZE_MB = 50
uploaded = st.file_uploader("πŸ“ Browse and select a file", type=["csv", "xlsx", "json", "pdf", "docx"])
if uploaded:
st.write("🧾 Debug Upload Log:")
st.code(f"Filename: {uploaded.name} | Type: {uploaded.type} | Size: {uploaded.size} bytes")
st.write("πŸ“₯ Upload Raw Details (client-side and browser environment check):")
st.json({
"user_agent": st.experimental_get_query_params().get('user-agent', ['N/A'])[0],
"streamlit_version": st.__version__,
"name": uploaded.name,
"type": uploaded.type,
"size_bytes": uploaded.size,
"is_excel": uploaded.name.endswith(('.xls', '.xlsx')),
"is_binary": isinstance(uploaded, io.BytesIO),
})
st.write("🧾 Debug Upload Log:")
st.code(f"Filename: {uploaded.name} | Type: {uploaded.type} | Size: {uploaded.size} bytes")
st.write("πŸ“ File metadata:")
st.json({"name": uploaded.name, "size_kb": uploaded.size / 1024, "type": uploaded.type})
if uploaded.size > MAX_SIZE_MB * 1024 * 1024:
st.error(f"❌ File too large. Maximum allowed size is {MAX_SIZE_MB}MB.")
st.stop()
st.session_state.uploaded_file = uploaded
st.info(f"βœ… File selected: `{uploaded.name}` ({uploaded.size / 1024:.1f} KB)")
if st.session_state.uploaded_file and st.button("πŸ“€ Upload File"):
try:
st.write("πŸ“‘ Reading uploaded file content...")
content = st.session_state.uploaded_file.read()
st.write("πŸ§ͺ File read complete. Size:", len(content), "bytes")
st.code(str(content[:200]) + "..." if isinstance(content, bytes) else content[:500])
ftype = st.session_state.uploaded_file.name.split(".")[-1].lower()
with st.spinner("πŸ”„ Agent Alpha (Uploader): Processing and indexing file..."):
agent, vectorstore = agent_alpha_file_uploader(content, ftype)
if agent or vectorstore:
st.session_state.agent = agent
st.session_state.vectorstore = vectorstore
st.session_state.file_uploaded = True
st.session_state.file_type = ftype
st.success("βœ… File processed successfully.")
else:
st.error("⚠️ Failed to process file.")
except Exception as e:
st.error("❌ Upload failed. Try a smaller file or check connection.")
st.exception(e)
# --- Query UI ---
if st.session_state.file_uploaded:
output_format = st.selectbox("πŸ“‹ Select Output Format", ["Plain Text", "Markdown", "Tabular View"])
query = st.text_area("πŸ” Ask a question about your uploaded file")
if st.button("Submit Query"):
if not query.strip():
st.warning("⚠️ Please enter a valid question.")
else:
with st.spinner("πŸ’‘ Agent Beta (Processor): Handling your query..."):
raw_response = agent_beta_query_processor(
query,
st.session_state.file_type,
df_agent=st.session_state.agent,
vectorstore=st.session_state.vectorstore,
)
with st.spinner("✨ Agent Gamma (Reviewer): Enhancing the response..."):
enhanced_response = agent_gamma_response_enhancer(raw_response)
st.subheader("πŸ“Œ Final Answer")
if output_format == "Plain Text":
st.text(enhanced_response)
elif output_format == "Markdown":
st.markdown(enhanced_response)
elif output_format == "Tabular View":
rows = [line.split("\t") for line in enhanced_response.split("\n") if "\t" in line]
if not rows or len(rows[0]) == 1:
rows = [line.split(",") for line in enhanced_response.split("\n") if "," in line]
try:
df = pd.DataFrame(rows[1:], columns=rows[0])
st.dataframe(df)
except Exception:
st.warning("⚠️ Could not render table. Showing raw text.")
st.text(enhanced_response)