Spaces:
Build error
Build error
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +21 -23
src/streamlit_app.py
CHANGED
|
@@ -1,41 +1,39 @@
|
|
| 1 |
-
#
|
| 2 |
import os
|
| 3 |
os.environ["STREAMLIT_HOME"] = "/tmp"
|
| 4 |
os.environ["XDG_CONFIG_HOME"] = "/tmp"
|
| 5 |
os.environ["XDG_DATA_HOME"] = "/tmp"
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
import streamlit as st
|
| 8 |
import pandas as pd
|
| 9 |
import json
|
| 10 |
import io
|
| 11 |
|
| 12 |
-
|
| 13 |
-
from
|
| 14 |
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
|
| 15 |
from langchain.text_splitter import CharacterTextSplitter
|
| 16 |
-
from langchain.embeddings import OpenAIEmbeddings
|
| 17 |
-
from langchain.vectorstores import FAISS
|
| 18 |
from langchain.chains import RetrievalQA
|
| 19 |
-
from langchain_community.vectorstores import FAISS
|
| 20 |
-
from langchain_openai import OpenAIEmbeddings
|
| 21 |
|
| 22 |
import PyPDF2
|
| 23 |
from docx import Document
|
| 24 |
|
| 25 |
-
# Load
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
_ = load_dotenv(find_dotenv())
|
| 30 |
-
|
| 31 |
-
# Get API key from Streamlit secrets
|
| 32 |
-
API_KEY = os.getenv("OPENAI_API_KEY")
|
| 33 |
|
| 34 |
-
# Streamlit
|
| 35 |
st.set_page_config(page_title="RAG File Chat", layout="centered")
|
| 36 |
st.title("π§ Chat with Your Uploaded File")
|
| 37 |
|
| 38 |
-
#
|
| 39 |
if "uploaded_file" not in st.session_state:
|
| 40 |
st.session_state.uploaded_file = None
|
| 41 |
if "file_uploaded" not in st.session_state:
|
|
@@ -80,9 +78,9 @@ def create_agent_and_index(file_content, file_type):
|
|
| 80 |
elif file_type in ["pdf", "docx"]:
|
| 81 |
text = extract_text_from_file(file_content, file_type)
|
| 82 |
st.success(f"π Extracted text from {file_type.upper()}.")
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
st.session_state.vectorstore = FAISS.from_texts(
|
| 86 |
st.success("π§ Embedded text and stored in FAISS.")
|
| 87 |
else:
|
| 88 |
st.error("β Unsupported file type.")
|
|
@@ -91,7 +89,7 @@ def create_agent_and_index(file_content, file_type):
|
|
| 91 |
st.session_state.file_type = file_type
|
| 92 |
|
| 93 |
|
| 94 |
-
# π Upload
|
| 95 |
uploaded = st.file_uploader("π Browse and select a file", type=["csv", "xlsx", "json", "pdf", "docx"])
|
| 96 |
if uploaded:
|
| 97 |
st.session_state.uploaded_file = uploaded
|
|
@@ -103,7 +101,7 @@ if st.session_state.uploaded_file and st.button("π€ Upload File"):
|
|
| 103 |
with st.spinner("π Uploading and processing..."):
|
| 104 |
create_agent_and_index(file_content, file_type)
|
| 105 |
|
| 106 |
-
# π¬ Query
|
| 107 |
if st.session_state.file_uploaded:
|
| 108 |
output_format = st.selectbox("π Select Output Format", ["Plain Text", "Markdown", "Tabular View"])
|
| 109 |
query = st.text_area("π¬ Ask a question about your uploaded file")
|
|
@@ -138,5 +136,5 @@ if st.session_state.file_uploaded:
|
|
| 138 |
df = pd.DataFrame(rows[1:], columns=rows[0])
|
| 139 |
st.dataframe(df)
|
| 140 |
except Exception:
|
| 141 |
-
st.warning("β οΈ Could not render
|
| 142 |
st.text(response)
|
|
|
|
| 1 |
+
# Hugging Face-compatible environment fixes
|
| 2 |
import os
|
| 3 |
os.environ["STREAMLIT_HOME"] = "/tmp"
|
| 4 |
os.environ["XDG_CONFIG_HOME"] = "/tmp"
|
| 5 |
os.environ["XDG_DATA_HOME"] = "/tmp"
|
| 6 |
|
| 7 |
+
# Fix async loop crash in HF Spaces
|
| 8 |
+
import asyncio
|
| 9 |
+
try:
|
| 10 |
+
asyncio.get_running_loop()
|
| 11 |
+
except RuntimeError:
|
| 12 |
+
asyncio.set_event_loop(asyncio.new_event_loop())
|
| 13 |
+
|
| 14 |
import streamlit as st
|
| 15 |
import pandas as pd
|
| 16 |
import json
|
| 17 |
import io
|
| 18 |
|
| 19 |
+
from langchain_openai import OpenAIEmbeddings, OpenAI
|
| 20 |
+
from langchain_community.vectorstores import FAISS
|
| 21 |
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
|
| 22 |
from langchain.text_splitter import CharacterTextSplitter
|
|
|
|
|
|
|
| 23 |
from langchain.chains import RetrievalQA
|
|
|
|
|
|
|
| 24 |
|
| 25 |
import PyPDF2
|
| 26 |
from docx import Document
|
| 27 |
|
| 28 |
+
# Load API key securely
|
| 29 |
+
API_KEY = st.secrets["OPENAI_API_KEY"]
|
| 30 |
+
embeddings_model = OpenAIEmbeddings(openai_api_key=API_KEY)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
+
# Streamlit settings
|
| 33 |
st.set_page_config(page_title="RAG File Chat", layout="centered")
|
| 34 |
st.title("π§ Chat with Your Uploaded File")
|
| 35 |
|
| 36 |
+
# Session state init
|
| 37 |
if "uploaded_file" not in st.session_state:
|
| 38 |
st.session_state.uploaded_file = None
|
| 39 |
if "file_uploaded" not in st.session_state:
|
|
|
|
| 78 |
elif file_type in ["pdf", "docx"]:
|
| 79 |
text = extract_text_from_file(file_content, file_type)
|
| 80 |
st.success(f"π Extracted text from {file_type.upper()}.")
|
| 81 |
+
splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
| 82 |
+
chunks = splitter.split_text(text)
|
| 83 |
+
st.session_state.vectorstore = FAISS.from_texts(chunks, embeddings_model)
|
| 84 |
st.success("π§ Embedded text and stored in FAISS.")
|
| 85 |
else:
|
| 86 |
st.error("β Unsupported file type.")
|
|
|
|
| 89 |
st.session_state.file_type = file_type
|
| 90 |
|
| 91 |
|
| 92 |
+
# π Upload UI
|
| 93 |
uploaded = st.file_uploader("π Browse and select a file", type=["csv", "xlsx", "json", "pdf", "docx"])
|
| 94 |
if uploaded:
|
| 95 |
st.session_state.uploaded_file = uploaded
|
|
|
|
| 101 |
with st.spinner("π Uploading and processing..."):
|
| 102 |
create_agent_and_index(file_content, file_type)
|
| 103 |
|
| 104 |
+
# π¬ Query UI
|
| 105 |
if st.session_state.file_uploaded:
|
| 106 |
output_format = st.selectbox("π Select Output Format", ["Plain Text", "Markdown", "Tabular View"])
|
| 107 |
query = st.text_area("π¬ Ask a question about your uploaded file")
|
|
|
|
| 136 |
df = pd.DataFrame(rows[1:], columns=rows[0])
|
| 137 |
st.dataframe(df)
|
| 138 |
except Exception:
|
| 139 |
+
st.warning("β οΈ Could not render table. Showing raw text.")
|
| 140 |
st.text(response)
|