PRSHNTKUMR commited on
Commit
9507dac
Β·
verified Β·
1 Parent(s): 88a3a29

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +33 -48
src/streamlit_app.py CHANGED
@@ -1,8 +1,11 @@
1
- # Hugging Face-compatible environment fixes
 
 
2
  import os
3
  os.environ["STREAMLIT_HOME"] = "/tmp"
4
  os.environ["XDG_CONFIG_HOME"] = "/tmp"
5
  os.environ["XDG_DATA_HOME"] = "/tmp"
 
6
 
7
  import asyncio
8
  try:
@@ -10,45 +13,35 @@ try:
10
  except RuntimeError:
11
  asyncio.set_event_loop(asyncio.new_event_loop())
12
 
13
-
14
- from dotenv import load_dotenv, find_dotenv
15
- import os
16
  import streamlit as st
17
  import pandas as pd
18
  import json
19
  import io
20
-
21
  from langchain_openai import OpenAIEmbeddings, OpenAI
22
  from langchain_community.vectorstores import FAISS
23
  from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
24
  from langchain.text_splitter import CharacterTextSplitter
25
  from langchain.chains import RetrievalQA
26
- import streamlit.runtime.metrics_util
27
- streamlit.runtime.metrics_util._get_machine_id_v4 = lambda: "HUGGINGFACE_PATCH"
28
-
29
  import PyPDF2
30
  from docx import Document
31
 
32
- # Load local .env if it exists
33
  _ = load_dotenv(find_dotenv())
34
-
35
- # Try Hugging Face secret first, then fallback to environment
36
  API_KEY = st.secrets.get("OPENAI_API_KEY", os.getenv("OPENAI_API_KEY"))
37
 
38
  if not API_KEY:
39
- st.error("❌ OPENAI_API_KEY not found. Please set it in Hugging Face β†’ Settings β†’ Secrets.")
40
  st.stop()
41
 
42
- # Get OpenAI API key from Hugging Face secrets
43
-
44
-
45
  embeddings_model = OpenAIEmbeddings(openai_api_key=API_KEY)
46
 
47
- # Streamlit settings
48
  st.set_page_config(page_title="RAG File Chat", layout="centered")
49
- st.title("🧠 Chat with Your Uploaded File")
50
 
51
- # Session state init
52
  if "uploaded_file" not in st.session_state:
53
  st.session_state.uploaded_file = None
54
  if "file_uploaded" not in st.session_state:
@@ -60,7 +53,7 @@ if "agent" not in st.session_state:
60
  if "file_type" not in st.session_state:
61
  st.session_state.file_type = None
62
 
63
-
64
  def extract_text_from_file(file_content, file_type):
65
  if file_type == "pdf":
66
  reader = PyPDF2.PdfReader(io.BytesIO(file_content))
@@ -70,62 +63,55 @@ def extract_text_from_file(file_content, file_type):
70
  return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
71
  return ""
72
 
73
-
74
  def create_agent_and_index(file_content, file_type):
75
  if file_type == "csv":
76
  df = pd.read_csv(io.StringIO(file_content.decode("utf-8")))
77
- st.success("πŸ“„ CSV file loaded.")
78
  llm = OpenAI(openai_api_key=API_KEY)
79
  st.session_state.agent = create_pandas_dataframe_agent(llm, df, verbose=False)
80
- st.success("πŸ€– Agent created for tabular data.")
81
  elif file_type == "xlsx":
82
  df = pd.read_excel(file_content)
83
- st.success("πŸ“„ Excel file loaded.")
84
  llm = OpenAI(openai_api_key=API_KEY)
85
  st.session_state.agent = create_pandas_dataframe_agent(llm, df, verbose=False)
86
- st.success("πŸ€– Agent created for tabular data.")
87
  elif file_type == "json":
88
  df = pd.DataFrame(json.loads(file_content.decode("utf-8")))
89
- st.success("πŸ“„ JSON file loaded.")
90
  llm = OpenAI(openai_api_key=API_KEY)
91
  st.session_state.agent = create_pandas_dataframe_agent(llm, df, verbose=False)
92
- st.success("πŸ€– Agent created for tabular data.")
93
  elif file_type in ["pdf", "docx"]:
94
  text = extract_text_from_file(file_content, file_type)
95
- st.success(f"πŸ“ƒ Extracted text from {file_type.upper()}.")
96
- splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
97
- chunks = splitter.split_text(text)
98
  st.session_state.vectorstore = FAISS.from_texts(chunks, embeddings_model)
99
- st.success("🧠 Embedded text and stored in FAISS.")
100
  else:
101
- st.error("❌ Unsupported file type.")
102
  return
103
  st.session_state.file_uploaded = True
104
  st.session_state.file_type = file_type
105
 
106
-
107
- # πŸ“ Upload UI
108
- uploaded = st.file_uploader("πŸ“ Browse and select a file", type=["csv", "xlsx", "json", "pdf", "docx"])
109
  if uploaded:
110
  st.session_state.uploaded_file = uploaded
111
- st.info(f"βœ… File selected: `{uploaded.name}` ({uploaded.size / 1024:.1f} KB)")
112
 
113
- if st.session_state.uploaded_file and st.button("πŸ“€ Upload File"):
114
- file_content = st.session_state.uploaded_file.read()
115
- file_type = st.session_state.uploaded_file.name.split(".")[-1]
116
- with st.spinner("πŸ”„ Uploading and processing..."):
117
- create_agent_and_index(file_content, file_type)
118
 
119
- # πŸ’¬ Query UI
120
  if st.session_state.file_uploaded:
121
- output_format = st.selectbox("πŸ“‹ Select Output Format", ["Plain Text", "Markdown", "Tabular View"])
122
- query = st.text_area("πŸ’¬ Ask a question about your uploaded file")
123
 
124
  if st.button("Submit Query"):
125
  if not query.strip():
126
- st.warning("⚠️ Please enter a valid question.")
127
  else:
128
- with st.spinner("πŸ’‘ Thinking..."):
129
  if st.session_state.file_type in ["pdf", "docx"]:
130
  qa_chain = RetrievalQA.from_chain_type(
131
  llm=OpenAI(openai_api_key=API_KEY),
@@ -137,8 +123,7 @@ if st.session_state.file_uploaded:
137
  else:
138
  response = st.session_state.agent.run(query)
139
 
140
- st.subheader("πŸ“Œ Answer")
141
-
142
  if output_format == "Plain Text":
143
  st.text(response)
144
  elif output_format == "Markdown":
@@ -151,5 +136,5 @@ if st.session_state.file_uploaded:
151
  df = pd.DataFrame(rows[1:], columns=rows[0])
152
  st.dataframe(df)
153
  except Exception:
154
- st.warning("⚠️ Could not render table. Showing raw text.")
155
  st.text(response)
 
1
+ # βœ… Clean and Final Streamlit RAG App (Hugging Face + Local Ready)
2
+
3
+ # --- Environment Setup (Safe for Hugging Face) ---
4
  import os
5
  os.environ["STREAMLIT_HOME"] = "/tmp"
6
  os.environ["XDG_CONFIG_HOME"] = "/tmp"
7
  os.environ["XDG_DATA_HOME"] = "/tmp"
8
+ os.environ["HOME"] = "/tmp"
9
 
10
  import asyncio
11
  try:
 
13
  except RuntimeError:
14
  asyncio.set_event_loop(asyncio.new_event_loop())
15
 
16
+ # --- Imports ---
 
 
17
  import streamlit as st
18
  import pandas as pd
19
  import json
20
  import io
21
+ from dotenv import load_dotenv, find_dotenv
22
  from langchain_openai import OpenAIEmbeddings, OpenAI
23
  from langchain_community.vectorstores import FAISS
24
  from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
25
  from langchain.text_splitter import CharacterTextSplitter
26
  from langchain.chains import RetrievalQA
 
 
 
27
  import PyPDF2
28
  from docx import Document
29
 
30
+ # --- Load API Key Securely ---
31
  _ = load_dotenv(find_dotenv())
 
 
32
  API_KEY = st.secrets.get("OPENAI_API_KEY", os.getenv("OPENAI_API_KEY"))
33
 
34
  if not API_KEY:
35
+ st.error("\u274c `OPENAI_API_KEY` is missing.\n\nGo to Hugging Face Settings β†’ Secrets and add it, or use a `.env` file locally.")
36
  st.stop()
37
 
 
 
 
38
  embeddings_model = OpenAIEmbeddings(openai_api_key=API_KEY)
39
 
40
+ # --- Streamlit Page Setup ---
41
  st.set_page_config(page_title="RAG File Chat", layout="centered")
42
+ st.title("\ud83e\udee0 Chat with Your Uploaded File")
43
 
44
+ # --- Session State ---
45
  if "uploaded_file" not in st.session_state:
46
  st.session_state.uploaded_file = None
47
  if "file_uploaded" not in st.session_state:
 
53
  if "file_type" not in st.session_state:
54
  st.session_state.file_type = None
55
 
56
+ # --- File Parsing Functions ---
57
  def extract_text_from_file(file_content, file_type):
58
  if file_type == "pdf":
59
  reader = PyPDF2.PdfReader(io.BytesIO(file_content))
 
63
  return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
64
  return ""
65
 
 
66
  def create_agent_and_index(file_content, file_type):
67
  if file_type == "csv":
68
  df = pd.read_csv(io.StringIO(file_content.decode("utf-8")))
 
69
  llm = OpenAI(openai_api_key=API_KEY)
70
  st.session_state.agent = create_pandas_dataframe_agent(llm, df, verbose=False)
71
+ st.success("\ud83e\udd16 Agent created for CSV.")
72
  elif file_type == "xlsx":
73
  df = pd.read_excel(file_content)
 
74
  llm = OpenAI(openai_api_key=API_KEY)
75
  st.session_state.agent = create_pandas_dataframe_agent(llm, df, verbose=False)
76
+ st.success("\ud83e\udd16 Agent created for Excel.")
77
  elif file_type == "json":
78
  df = pd.DataFrame(json.loads(file_content.decode("utf-8")))
 
79
  llm = OpenAI(openai_api_key=API_KEY)
80
  st.session_state.agent = create_pandas_dataframe_agent(llm, df, verbose=False)
81
+ st.success("\ud83e\udd16 Agent created for JSON.")
82
  elif file_type in ["pdf", "docx"]:
83
  text = extract_text_from_file(file_content, file_type)
84
+ chunks = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0).split_text(text)
 
 
85
  st.session_state.vectorstore = FAISS.from_texts(chunks, embeddings_model)
86
+ st.success("\ud83d\udcca Text embedded into FAISS vectorstore.")
87
  else:
88
+ st.error("\u274c Unsupported file type.")
89
  return
90
  st.session_state.file_uploaded = True
91
  st.session_state.file_type = file_type
92
 
93
+ # --- File Upload UI ---
94
+ uploaded = st.file_uploader("\ud83d\udcc1 Browse and select a file", type=["csv", "xlsx", "json", "pdf", "docx"])
 
95
  if uploaded:
96
  st.session_state.uploaded_file = uploaded
97
+ st.info(f"\u2705 File selected: `{uploaded.name}` ({uploaded.size / 1024:.1f} KB)")
98
 
99
+ if st.session_state.uploaded_file and st.button("\ud83d\udce4 Upload File"):
100
+ content = st.session_state.uploaded_file.read()
101
+ ftype = st.session_state.uploaded_file.name.split(".")[-1].lower()
102
+ with st.spinner("\ud83d\udd04 Processing file..."):
103
+ create_agent_and_index(content, ftype)
104
 
105
+ # --- Query UI ---
106
  if st.session_state.file_uploaded:
107
+ output_format = st.selectbox("\ud83d\udcca Select Output Format", ["Plain Text", "Markdown", "Tabular View"])
108
+ query = st.text_area("\ud83d\udd0d Ask a question about your uploaded file")
109
 
110
  if st.button("Submit Query"):
111
  if not query.strip():
112
+ st.warning("\u26a0\ufe0f Please enter a valid question.")
113
  else:
114
+ with st.spinner("\ud83d\udca1 Thinking..."):
115
  if st.session_state.file_type in ["pdf", "docx"]:
116
  qa_chain = RetrievalQA.from_chain_type(
117
  llm=OpenAI(openai_api_key=API_KEY),
 
123
  else:
124
  response = st.session_state.agent.run(query)
125
 
126
+ st.subheader("\ud83d\udccc Answer")
 
127
  if output_format == "Plain Text":
128
  st.text(response)
129
  elif output_format == "Markdown":
 
136
  df = pd.DataFrame(rows[1:], columns=rows[0])
137
  st.dataframe(df)
138
  except Exception:
139
+ st.warning("\u26a0\ufe0f Could not render table. Showing raw text.")
140
  st.text(response)