Spaces:

PRSHNTKUMR
/

Agent_RAG2

Build error

App Files Files Community

PRSHNTKUMR commited on May 6, 2025

Commit

ff7e11e

verified ·

1 Parent(s): 329e671

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +102 -38

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,104 @@
-import altair as alt
-import numpy as np
-import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

 import streamlit as st
+import pandas as pd
+import json
+import io
+import os
+from langchain.llms import OpenAI
+from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.embeddings import OpenAIEmbeddings
+from langchain.vectorstores import Chroma
+from langchain.chains import RetrievalQA
+import PyPDF2
+from docx import Document
+from dotenv import load_dotenv, find_dotenv
+_ = load_dotenv(find_dotenv())
+# Get API key from Streamlit secrets
+API_KEY = os.getenv("OPENAI_API_KEY")
+# Initialize embedding model and vector store in memory (no disk persistence)
+embeddings_model = OpenAIEmbeddings(openai_api_key=API_KEY)
+vectorstore = Chroma(embedding_function=embeddings_model)
+# Session flags
+if "agent_created" not in st.session_state:
+    st.session_state.agent_created = False
+def create_agent(file_content, file_type):
+    """Create an agent from file content and index the data."""
+    if file_type == "csv":
+        df = pd.read_csv(io.StringIO(file_content.decode("utf-8")), header=0)
+    elif file_type == "xlsx":
+        df = pd.read_excel(file_content, header=0)
+    elif file_type == "json":
+        df = pd.DataFrame(json.loads(file_content.decode("utf-8")))
+    elif file_type in ["pdf", "docx"]:
+        text = extract_text_from_file(file_content, file_type)
+        text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
+        texts = text_splitter.split_text(text)
+        df = pd.DataFrame({"text": texts})
+    else:
+        raise ValueError(f"Unsupported file type: {file_type}")
+    # Add text chunks to vectorstore
+    if file_type in ["pdf", "docx"]:
+        vectorstore.add_texts(texts=df['text'].tolist(), metadatas=[{'source': file_type}] * len(df))
+    llm = OpenAI(openai_api_key=API_KEY)
+    return create_pandas_dataframe_agent(llm, df, verbose=False)
+def extract_text_from_file(file_content, file_type):
+    """Extract raw text from supported document formats."""
+    if file_type == "pdf":
+        reader = PyPDF2.PdfReader(io.BytesIO(file_content))
+        return "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
+    elif file_type == "docx":
+        doc = Document(io.BytesIO(file_content))
+        return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
+    else:
+        return ""
+def query_agent(query):
+    """Query the vectorstore using RAG."""
+    qa_chain = RetrievalQA.from_chain_type(
+        llm=OpenAI(openai_api_key=API_KEY),
+        chain_type="stuff",
+        retriever=vectorstore.as_retriever(search_kwargs={"k": 5}),
+    )
+    result = qa_chain({"query": query})
+    return result["result"]
+# --- Streamlit UI ---
+st.set_page_config(page_title="RAG from Upload", layout="centered")
+st.title("🧠 Chat with Your File")
+uploaded_file = st.file_uploader("Upload a file", type=["csv", "xlsx", "json", "pdf", "docx"])
+if uploaded_file is not None:
+    file_content = uploaded_file.read()
+    file_type = uploaded_file.name.split(".")[-1]
+    query = st.text_area("Enter your query")
+    if st.button("Submit Query", type="primary"):
+        if not query.strip():
+            st.warning("Please enter a valid query.")
+            st.stop()
+        if not st.session_state.agent_created:
+            create_agent(file_content, file_type)
+            st.session_state.agent_created = True
+            st.success("Data loaded and indexed.")
+        response = query_agent(query)
+        st.subheader("📌 Answer")
+        st.write(response)