# ๐Ÿซ  Clean and Final Streamlit RAG App (Three-Agent Architecture) # --- Environment Setup (Safe for Hugging Face) --- import os os.environ["STREAMLIT_HOME"] = "/tmp" os.environ["XDG_CONFIG_HOME"] = "/tmp" os.environ["XDG_DATA_HOME"] = "/tmp" os.environ["HOME"] = "/tmp" import asyncio try: asyncio.get_running_loop() except RuntimeError: asyncio.set_event_loop(asyncio.new_event_loop()) # --- Imports --- import streamlit as st import pandas as pd import json import io from dotenv import load_dotenv, find_dotenv from langchain_openai import OpenAIEmbeddings, OpenAI from langchain_community.vectorstores import FAISS from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent from langchain.text_splitter import CharacterTextSplitter from langchain.chains import RetrievalQA import PyPDF2 from docx import Document # --- Load API Key Securely --- _ = load_dotenv(find_dotenv()) try: API_KEY = st.secrets["OPENAI_API_KEY"] except st.runtime.secrets.StreamlitSecretNotFoundError: API_KEY = os.getenv("OPENAI_API_KEY") embeddings_model = OpenAIEmbeddings(openai_api_key=API_KEY) # --- Streamlit Page Setup --- st.set_page_config(page_title="RAG File Chat", layout="centered") st.title("๐Ÿง  Chat with Your Uploaded File") # --- Agent 1: File Ingestion and Indexing --- def agent_alpha_file_uploader(file_content, file_type): st.info("๐Ÿ“‚ Agent 1: Loading and indexing your file...") if file_type == "csv": df = pd.read_csv(io.StringIO(file_content.decode("utf-8"))) llm = OpenAI(openai_api_key=API_KEY) return create_pandas_dataframe_agent(llm, df, verbose=False), None elif file_type == "xlsx": df = pd.read_excel(file_content) llm = OpenAI(openai_api_key=API_KEY) return create_pandas_dataframe_agent(llm, df, verbose=False), None elif file_type == "json": df = pd.DataFrame(json.loads(file_content.decode("utf-8"))) llm = OpenAI(openai_api_key=API_KEY) return create_pandas_dataframe_agent(llm, df, verbose=False), None elif file_type in ["pdf", "docx"]: text = extract_text_from_file(file_content, file_type) chunks = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0).split_text(text) vectorstore = FAISS.from_texts(chunks, embeddings_model) return None, vectorstore else: st.error("โŒ Unsupported file type.") return None, None # --- Agent 2: Query Resolution --- def agent_beta_query_processor(query, file_type, df_agent=None, vectorstore=None): st.info("๐Ÿง  Agent 2: Processing your question...") if file_type in ["pdf", "docx"]: qa_chain = RetrievalQA.from_chain_type( llm=OpenAI(openai_api_key=API_KEY), chain_type="stuff", retriever=vectorstore.as_retriever(search_kwargs={"k": 5}), ) result = qa_chain({"query": query}) return result["result"] else: return df_agent.run(query) # --- Agent 3: Response Enhancement --- def agent_gamma_response_enhancer(response): st.info("๐Ÿ” Agent 3: Reviewing and enhancing the response...") enhancement_prompt = f"Improve the clarity and format of the following response:\n{response}" llm = OpenAI(openai_api_key=API_KEY) return llm.invoke(enhancement_prompt) # --- Helper Function for Text Extraction --- def extract_text_from_file(file_content, file_type): if file_type == "pdf": reader = PyPDF2.PdfReader(io.BytesIO(file_content)) return "\n".join([page.extract_text() for page in reader.pages if page.extract_text()]) elif file_type == "docx": doc = Document(io.BytesIO(file_content)) return "\n".join([p.text for p in doc.paragraphs if p.text.strip()]) return "" # --- Session State --- if "uploaded_file" not in st.session_state: st.session_state.uploaded_file = None if "file_uploaded" not in st.session_state: st.session_state.file_uploaded = False if "vectorstore" not in st.session_state: st.session_state.vectorstore = None if "agent" not in st.session_state: st.session_state.agent = None if "file_type" not in st.session_state: st.session_state.file_type = None # --- File Upload UI --- MAX_SIZE_MB = 50 uploaded = st.file_uploader("๐Ÿ“ Browse and select a file", type=["csv", "xlsx", "json", "pdf", "docx"]) if uploaded: st.write("๐Ÿงพ Debug Upload Log:") st.code(f"Filename: {uploaded.name} | Type: {uploaded.type} | Size: {uploaded.size} bytes") st.write("๐Ÿ“ฅ Upload Raw Details (client-side and browser environment check):") st.json({ "user_agent": st.experimental_get_query_params().get('user-agent', ['N/A'])[0], "streamlit_version": st.__version__, "name": uploaded.name, "type": uploaded.type, "size_bytes": uploaded.size, "is_excel": uploaded.name.endswith(('.xls', '.xlsx')), "is_binary": isinstance(uploaded, io.BytesIO), }) st.write("๐Ÿงพ Debug Upload Log:") st.code(f"Filename: {uploaded.name} | Type: {uploaded.type} | Size: {uploaded.size} bytes") st.write("๐Ÿ“ File metadata:") st.json({"name": uploaded.name, "size_kb": uploaded.size / 1024, "type": uploaded.type}) if uploaded.size > MAX_SIZE_MB * 1024 * 1024: st.error(f"โŒ File too large. Maximum allowed size is {MAX_SIZE_MB}MB.") st.stop() st.session_state.uploaded_file = uploaded st.info(f"โœ… File selected: `{uploaded.name}` ({uploaded.size / 1024:.1f} KB)") if st.session_state.uploaded_file and st.button("๐Ÿ“ค Upload File"): try: st.write("๐Ÿ“ก Reading uploaded file content...") content = st.session_state.uploaded_file.read() st.write("๐Ÿงช File read complete. Size:", len(content), "bytes") st.code(str(content[:200]) + "..." if isinstance(content, bytes) else content[:500]) ftype = st.session_state.uploaded_file.name.split(".")[-1].lower() with st.spinner("๐Ÿ”„ Agent Alpha (Uploader): Processing and indexing file..."): agent, vectorstore = agent_alpha_file_uploader(content, ftype) if agent or vectorstore: st.session_state.agent = agent st.session_state.vectorstore = vectorstore st.session_state.file_uploaded = True st.session_state.file_type = ftype st.success("โœ… File processed successfully.") else: st.error("โš ๏ธ Failed to process file.") except Exception as e: st.error("โŒ Upload failed. Try a smaller file or check connection.") st.exception(e) # --- Query UI --- if st.session_state.file_uploaded: output_format = st.selectbox("๐Ÿ“‹ Select Output Format", ["Plain Text", "Markdown", "Tabular View"]) query = st.text_area("๐Ÿ” Ask a question about your uploaded file") if st.button("Submit Query"): if not query.strip(): st.warning("โš ๏ธ Please enter a valid question.") else: with st.spinner("๐Ÿ’ก Agent Beta (Processor): Handling your query..."): raw_response = agent_beta_query_processor( query, st.session_state.file_type, df_agent=st.session_state.agent, vectorstore=st.session_state.vectorstore, ) with st.spinner("โœจ Agent Gamma (Reviewer): Enhancing the response..."): enhanced_response = agent_gamma_response_enhancer(raw_response) st.subheader("๐Ÿ“Œ Final Answer") if output_format == "Plain Text": st.text(enhanced_response) elif output_format == "Markdown": st.markdown(enhanced_response) elif output_format == "Tabular View": rows = [line.split("\t") for line in enhanced_response.split("\n") if "\t" in line] if not rows or len(rows[0]) == 1: rows = [line.split(",") for line in enhanced_response.split("\n") if "," in line] try: df = pd.DataFrame(rows[1:], columns=rows[0]) st.dataframe(df) except Exception: st.warning("โš ๏ธ Could not render table. Showing raw text.") st.text(enhanced_response)