PRSHNTKUMR commited on
Commit
8b71299
Β·
verified Β·
1 Parent(s): c18b184

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +80 -55
src/streamlit_app.py CHANGED
@@ -1,4 +1,4 @@
1
- # 🫠 Clean and Final Streamlit RAG App (Hugging Face + Local Ready)
2
 
3
  # --- Environment Setup (Safe for Hugging Face) ---
4
  import os
@@ -40,54 +40,72 @@ embeddings_model = OpenAIEmbeddings(openai_api_key=API_KEY)
40
  st.set_page_config(page_title="RAG File Chat", layout="centered")
41
  st.title("🧠 Chat with Your Uploaded File")
42
 
43
- # --- Session State ---
44
- if "uploaded_file" not in st.session_state:
45
- st.session_state.uploaded_file = None
46
- if "file_uploaded" not in st.session_state:
47
- st.session_state.file_uploaded = False
48
- if "vectorstore" not in st.session_state:
49
- st.session_state.vectorstore = None
50
- if "agent" not in st.session_state:
51
- st.session_state.agent = None
52
- if "file_type" not in st.session_state:
53
- st.session_state.file_type = None
54
-
55
- # --- File Parsing Functions ---
56
- def extract_text_from_file(file_content, file_type):
57
- if file_type == "pdf":
58
- reader = PyPDF2.PdfReader(io.BytesIO(file_content))
59
- return "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
60
- elif file_type == "docx":
61
- doc = Document(io.BytesIO(file_content))
62
- return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
63
- return ""
64
-
65
- def create_agent_and_index(file_content, file_type):
66
  if file_type == "csv":
67
  df = pd.read_csv(io.StringIO(file_content.decode("utf-8")))
68
  llm = OpenAI(openai_api_key=API_KEY)
69
- st.session_state.agent = create_pandas_dataframe_agent(llm, df, verbose=False)
70
- st.success("πŸ€– Agent created for CSV.")
71
  elif file_type == "xlsx":
72
  df = pd.read_excel(file_content)
73
  llm = OpenAI(openai_api_key=API_KEY)
74
- st.session_state.agent = create_pandas_dataframe_agent(llm, df, verbose=False)
75
- st.success("πŸ€– Agent created for Excel.")
76
  elif file_type == "json":
77
  df = pd.DataFrame(json.loads(file_content.decode("utf-8")))
78
  llm = OpenAI(openai_api_key=API_KEY)
79
- st.session_state.agent = create_pandas_dataframe_agent(llm, df, verbose=False)
80
- st.success("πŸ€– Agent created for JSON.")
81
  elif file_type in ["pdf", "docx"]:
82
  text = extract_text_from_file(file_content, file_type)
83
  chunks = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0).split_text(text)
84
- st.session_state.vectorstore = FAISS.from_texts(chunks, embeddings_model)
85
- st.success("πŸ“Š Text embedded into FAISS vectorstore.")
86
  else:
87
  st.error("❌ Unsupported file type.")
88
- return
89
- st.session_state.file_uploaded = True
90
- st.session_state.file_type = file_type
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
  # --- File Upload UI ---
93
  MAX_SIZE_MB = 50
@@ -106,8 +124,16 @@ if st.session_state.uploaded_file and st.button("πŸ“€ Upload File"):
106
  try:
107
  content = st.session_state.uploaded_file.read()
108
  ftype = st.session_state.uploaded_file.name.split(".")[-1].lower()
109
- with st.spinner("πŸ”„ Processing file..."):
110
- create_agent_and_index(content, ftype)
 
 
 
 
 
 
 
 
111
  except Exception as e:
112
  st.error("❌ Upload failed. Try a smaller file or check connection.")
113
  st.exception(e)
@@ -121,30 +147,29 @@ if st.session_state.file_uploaded:
121
  if not query.strip():
122
  st.warning("⚠️ Please enter a valid question.")
123
  else:
124
- with st.spinner("πŸ’‘ Thinking..."):
125
- if st.session_state.file_type in ["pdf", "docx"]:
126
- qa_chain = RetrievalQA.from_chain_type(
127
- llm=OpenAI(openai_api_key=API_KEY),
128
- chain_type="stuff",
129
- retriever=st.session_state.vectorstore.as_retriever(search_kwargs={"k": 5}),
130
- )
131
- result = qa_chain({"query": query})
132
- response = result["result"]
133
- else:
134
- response = st.session_state.agent.run(query)
135
-
136
- st.subheader("πŸ“Œ Answer")
137
  if output_format == "Plain Text":
138
- st.text(response)
139
  elif output_format == "Markdown":
140
- st.markdown(response)
141
  elif output_format == "Tabular View":
142
- rows = [line.split("\t") for line in response.split("\n") if "\t" in line]
143
  if not rows or len(rows[0]) == 1:
144
- rows = [line.split(",") for line in response.split("\n") if "," in line]
145
  try:
146
  df = pd.DataFrame(rows[1:], columns=rows[0])
147
  st.dataframe(df)
148
  except Exception:
149
  st.warning("⚠️ Could not render table. Showing raw text.")
150
- st.text(response)
 
1
+ # 🫠 Clean and Final Streamlit RAG App (Three-Agent Architecture)
2
 
3
  # --- Environment Setup (Safe for Hugging Face) ---
4
  import os
 
40
  st.set_page_config(page_title="RAG File Chat", layout="centered")
41
  st.title("🧠 Chat with Your Uploaded File")
42
 
43
+ # --- Agent 1: File Ingestion and Indexing ---
44
+ def agent_file_loader(file_content, file_type):
45
+ st.info("πŸ“‚ Agent 1: Loading and indexing your file...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  if file_type == "csv":
47
  df = pd.read_csv(io.StringIO(file_content.decode("utf-8")))
48
  llm = OpenAI(openai_api_key=API_KEY)
49
+ return create_pandas_dataframe_agent(llm, df, verbose=False), None
 
50
  elif file_type == "xlsx":
51
  df = pd.read_excel(file_content)
52
  llm = OpenAI(openai_api_key=API_KEY)
53
+ return create_pandas_dataframe_agent(llm, df, verbose=False), None
 
54
  elif file_type == "json":
55
  df = pd.DataFrame(json.loads(file_content.decode("utf-8")))
56
  llm = OpenAI(openai_api_key=API_KEY)
57
+ return create_pandas_dataframe_agent(llm, df, verbose=False), None
 
58
  elif file_type in ["pdf", "docx"]:
59
  text = extract_text_from_file(file_content, file_type)
60
  chunks = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0).split_text(text)
61
+ vectorstore = FAISS.from_texts(chunks, embeddings_model)
62
+ return None, vectorstore
63
  else:
64
  st.error("❌ Unsupported file type.")
65
+ return None, None
66
+
67
+ # --- Agent 2: Query Resolution ---
68
+ def agent_query_executor(query, file_type, df_agent=None, vectorstore=None):
69
+ st.info("🧠 Agent 2: Processing your question...")
70
+ if file_type in ["pdf", "docx"]:
71
+ qa_chain = RetrievalQA.from_chain_type(
72
+ llm=OpenAI(openai_api_key=API_KEY),
73
+ chain_type="stuff",
74
+ retriever=vectorstore.as_retriever(search_kwargs={"k": 5}),
75
+ )
76
+ result = qa_chain({"query": query})
77
+ return result["result"]
78
+ else:
79
+ return df_agent.run(query)
80
+
81
+ # --- Agent 3: Response Enhancement ---
82
+ def agent_response_enhancer(response):
83
+ st.info("πŸ” Agent 3: Reviewing and enhancing the response...")
84
+ enhancement_prompt = f"Improve the clarity and format of the following response:\n{response}"
85
+ llm = OpenAI(openai_api_key=API_KEY)
86
+ return llm.invoke(enhancement_prompt)
87
+
88
+ # --- Helper Function for Text Extraction ---
89
+ def extract_text_from_file(file_content, file_type):
90
+ if file_type == "pdf":
91
+ reader = PyPDF2.PdfReader(io.BytesIO(file_content))
92
+ return "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
93
+ elif file_type == "docx":
94
+ doc = Document(io.BytesIO(file_content))
95
+ return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
96
+ return ""
97
+
98
+ # --- Session State ---
99
+ if "uploaded_file" not in st.session_state:
100
+ st.session_state.uploaded_file = None
101
+ if "file_uploaded" not in st.session_state:
102
+ st.session_state.file_uploaded = False
103
+ if "vectorstore" not in st.session_state:
104
+ st.session_state.vectorstore = None
105
+ if "agent" not in st.session_state:
106
+ st.session_state.agent = None
107
+ if "file_type" not in st.session_state:
108
+ st.session_state.file_type = None
109
 
110
  # --- File Upload UI ---
111
  MAX_SIZE_MB = 50
 
124
  try:
125
  content = st.session_state.uploaded_file.read()
126
  ftype = st.session_state.uploaded_file.name.split(".")[-1].lower()
127
+ with st.spinner("πŸ”„ Processing file with Agent 1..."):
128
+ agent, vectorstore = agent_file_loader(content, ftype)
129
+ if agent or vectorstore:
130
+ st.session_state.agent = agent
131
+ st.session_state.vectorstore = vectorstore
132
+ st.session_state.file_uploaded = True
133
+ st.session_state.file_type = ftype
134
+ st.success("βœ… File processed successfully.")
135
+ else:
136
+ st.error("⚠️ Failed to process file.")
137
  except Exception as e:
138
  st.error("❌ Upload failed. Try a smaller file or check connection.")
139
  st.exception(e)
 
147
  if not query.strip():
148
  st.warning("⚠️ Please enter a valid question.")
149
  else:
150
+ with st.spinner("πŸ’‘ Sending query to Agent 2..."):
151
+ raw_response = agent_query_executor(
152
+ query,
153
+ st.session_state.file_type,
154
+ df_agent=st.session_state.agent,
155
+ vectorstore=st.session_state.vectorstore,
156
+ )
157
+
158
+ with st.spinner("✨ Enhancing response with Agent 3..."):
159
+ enhanced_response = agent_response_enhancer(raw_response)
160
+
161
+ st.subheader("πŸ“Œ Final Answer")
 
162
  if output_format == "Plain Text":
163
+ st.text(enhanced_response)
164
  elif output_format == "Markdown":
165
+ st.markdown(enhanced_response)
166
  elif output_format == "Tabular View":
167
+ rows = [line.split("\t") for line in enhanced_response.split("\n") if "\t" in line]
168
  if not rows or len(rows[0]) == 1:
169
+ rows = [line.split(",") for line in enhanced_response.split("\n") if "," in line]
170
  try:
171
  df = pd.DataFrame(rows[1:], columns=rows[0])
172
  st.dataframe(df)
173
  except Exception:
174
  st.warning("⚠️ Could not render table. Showing raw text.")
175
+ st.text(enhanced_response)