PRSHNTKUMR commited on
Commit
2189cf4
Β·
verified Β·
1 Parent(s): 56e1806

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +21 -23
src/streamlit_app.py CHANGED
@@ -1,41 +1,39 @@
1
- # Fix permission error on Hugging Face
2
  import os
3
  os.environ["STREAMLIT_HOME"] = "/tmp"
4
  os.environ["XDG_CONFIG_HOME"] = "/tmp"
5
  os.environ["XDG_DATA_HOME"] = "/tmp"
6
 
 
 
 
 
 
 
 
7
  import streamlit as st
8
  import pandas as pd
9
  import json
10
  import io
11
 
12
-
13
- from langchain.llms import OpenAI
14
  from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
15
  from langchain.text_splitter import CharacterTextSplitter
16
- from langchain.embeddings import OpenAIEmbeddings
17
- from langchain.vectorstores import FAISS
18
  from langchain.chains import RetrievalQA
19
- from langchain_community.vectorstores import FAISS
20
- from langchain_openai import OpenAIEmbeddings
21
 
22
  import PyPDF2
23
  from docx import Document
24
 
25
- # Load OpenAI API Key
26
-
27
- from dotenv import load_dotenv, find_dotenv
28
-
29
- _ = load_dotenv(find_dotenv())
30
-
31
- # Get API key from Streamlit secrets
32
- API_KEY = os.getenv("OPENAI_API_KEY")
33
 
34
- # Streamlit app settings
35
  st.set_page_config(page_title="RAG File Chat", layout="centered")
36
  st.title("🧠 Chat with Your Uploaded File")
37
 
38
- # Initialize session state
39
  if "uploaded_file" not in st.session_state:
40
  st.session_state.uploaded_file = None
41
  if "file_uploaded" not in st.session_state:
@@ -80,9 +78,9 @@ def create_agent_and_index(file_content, file_type):
80
  elif file_type in ["pdf", "docx"]:
81
  text = extract_text_from_file(file_content, file_type)
82
  st.success(f"πŸ“ƒ Extracted text from {file_type.upper()}.")
83
- text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
84
- texts = text_splitter.split_text(text)
85
- st.session_state.vectorstore = FAISS.from_texts(texts, embeddings_model)
86
  st.success("🧠 Embedded text and stored in FAISS.")
87
  else:
88
  st.error("❌ Unsupported file type.")
@@ -91,7 +89,7 @@ def create_agent_and_index(file_content, file_type):
91
  st.session_state.file_type = file_type
92
 
93
 
94
- # πŸ“ Upload Section
95
  uploaded = st.file_uploader("πŸ“ Browse and select a file", type=["csv", "xlsx", "json", "pdf", "docx"])
96
  if uploaded:
97
  st.session_state.uploaded_file = uploaded
@@ -103,7 +101,7 @@ if st.session_state.uploaded_file and st.button("πŸ“€ Upload File"):
103
  with st.spinner("πŸ”„ Uploading and processing..."):
104
  create_agent_and_index(file_content, file_type)
105
 
106
- # πŸ’¬ Query Section
107
  if st.session_state.file_uploaded:
108
  output_format = st.selectbox("πŸ“‹ Select Output Format", ["Plain Text", "Markdown", "Tabular View"])
109
  query = st.text_area("πŸ’¬ Ask a question about your uploaded file")
@@ -138,5 +136,5 @@ if st.session_state.file_uploaded:
138
  df = pd.DataFrame(rows[1:], columns=rows[0])
139
  st.dataframe(df)
140
  except Exception:
141
- st.warning("⚠️ Could not render a table. Showing raw output instead.")
142
  st.text(response)
 
1
+ # Hugging Face-compatible environment fixes
2
  import os
3
  os.environ["STREAMLIT_HOME"] = "/tmp"
4
  os.environ["XDG_CONFIG_HOME"] = "/tmp"
5
  os.environ["XDG_DATA_HOME"] = "/tmp"
6
 
7
+ # Fix async loop crash in HF Spaces
8
+ import asyncio
9
+ try:
10
+ asyncio.get_running_loop()
11
+ except RuntimeError:
12
+ asyncio.set_event_loop(asyncio.new_event_loop())
13
+
14
  import streamlit as st
15
  import pandas as pd
16
  import json
17
  import io
18
 
19
+ from langchain_openai import OpenAIEmbeddings, OpenAI
20
+ from langchain_community.vectorstores import FAISS
21
  from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
22
  from langchain.text_splitter import CharacterTextSplitter
 
 
23
  from langchain.chains import RetrievalQA
 
 
24
 
25
  import PyPDF2
26
  from docx import Document
27
 
28
+ # Load API key securely
29
+ API_KEY = st.secrets["OPENAI_API_KEY"]
30
+ embeddings_model = OpenAIEmbeddings(openai_api_key=API_KEY)
 
 
 
 
 
31
 
32
+ # Streamlit settings
33
  st.set_page_config(page_title="RAG File Chat", layout="centered")
34
  st.title("🧠 Chat with Your Uploaded File")
35
 
36
+ # Session state init
37
  if "uploaded_file" not in st.session_state:
38
  st.session_state.uploaded_file = None
39
  if "file_uploaded" not in st.session_state:
 
78
  elif file_type in ["pdf", "docx"]:
79
  text = extract_text_from_file(file_content, file_type)
80
  st.success(f"πŸ“ƒ Extracted text from {file_type.upper()}.")
81
+ splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
82
+ chunks = splitter.split_text(text)
83
+ st.session_state.vectorstore = FAISS.from_texts(chunks, embeddings_model)
84
  st.success("🧠 Embedded text and stored in FAISS.")
85
  else:
86
  st.error("❌ Unsupported file type.")
 
89
  st.session_state.file_type = file_type
90
 
91
 
92
+ # πŸ“ Upload UI
93
  uploaded = st.file_uploader("πŸ“ Browse and select a file", type=["csv", "xlsx", "json", "pdf", "docx"])
94
  if uploaded:
95
  st.session_state.uploaded_file = uploaded
 
101
  with st.spinner("πŸ”„ Uploading and processing..."):
102
  create_agent_and_index(file_content, file_type)
103
 
104
+ # πŸ’¬ Query UI
105
  if st.session_state.file_uploaded:
106
  output_format = st.selectbox("πŸ“‹ Select Output Format", ["Plain Text", "Markdown", "Tabular View"])
107
  query = st.text_area("πŸ’¬ Ask a question about your uploaded file")
 
136
  df = pd.DataFrame(rows[1:], columns=rows[0])
137
  st.dataframe(df)
138
  except Exception:
139
+ st.warning("⚠️ Could not render table. Showing raw text.")
140
  st.text(response)