Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -45,8 +45,7 @@ sidebar_profiles()
|
|
| 45 |
def get_api_key():
|
| 46 |
api_key = os.getenv("GROQ_API_KEY")
|
| 47 |
if not api_key:
|
| 48 |
-
|
| 49 |
-
api_key = st.session_state["GROQ_API_KEY"]
|
| 50 |
return api_key
|
| 51 |
|
| 52 |
# Session state initialization
|
|
@@ -126,13 +125,10 @@ def retrieve_chunks(query, top_k=5):
|
|
| 126 |
def build_prompt(system_prompt, context_chunks, question):
|
| 127 |
context = "\n\n".join(context_chunks)
|
| 128 |
return f"""{system_prompt}
|
| 129 |
-
|
| 130 |
Context:
|
| 131 |
{context}
|
| 132 |
-
|
| 133 |
Question:
|
| 134 |
{question}
|
| 135 |
-
|
| 136 |
Answer: Please provide a comprehensive answer based only on the context provided."""
|
| 137 |
|
| 138 |
def generate_answer(prompt):
|
|
@@ -213,19 +209,7 @@ st.title("📄 RAG Explorer: AI-Powered Document Assistant")
|
|
| 213 |
st.markdown("Upload a document and ask questions to get AI-powered answers with translation capabilities.")
|
| 214 |
|
| 215 |
# Add API key input in sidebar
|
| 216 |
-
with st.sidebar:
|
| 217 |
-
st.header("API Configuration")
|
| 218 |
-
api_key_input = st.text_input(
|
| 219 |
-
"Groq API Key",
|
| 220 |
-
value=get_api_key() or "",
|
| 221 |
-
type="password",
|
| 222 |
-
help="Enter your Groq API key here if not set as environment variable"
|
| 223 |
-
)
|
| 224 |
-
|
| 225 |
-
if api_key_input:
|
| 226 |
-
st.session_state["GROQ_API_KEY"] = api_key_input
|
| 227 |
-
st.success("API key saved for this session!")
|
| 228 |
-
|
| 229 |
# Add model selection
|
| 230 |
st.subheader("Model Selection")
|
| 231 |
model_choice = st.selectbox(
|
|
@@ -244,58 +228,14 @@ with st.sidebar:
|
|
| 244 |
st.session_state.debug_mode = st.checkbox("Show Debug Information", value=st.session_state.debug_mode)
|
| 245 |
|
| 246 |
if st.session_state.last_query_time:
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
st.success(f"Indexed {chunk_count} text chunks from the uploaded PDF.")
|
| 255 |
-
|
| 256 |
-
# User query input
|
| 257 |
-
question = st.text_input("Ask a question based on the document")
|
| 258 |
-
|
| 259 |
-
if question:
|
| 260 |
-
context_chunks = retrieve_chunks(question)
|
| 261 |
-
system_prompt = "You are a helpful assistant answering only from the document context provided."
|
| 262 |
-
prompt = build_prompt(system_prompt, context_chunks, question)
|
| 263 |
-
answer = generate_answer(prompt)
|
| 264 |
-
|
| 265 |
-
# Show response
|
| 266 |
-
st.markdown("### 💬 AI Answer")
|
| 267 |
-
st.write(answer)
|
| 268 |
|
| 269 |
-
# Optional: Translate
|
| 270 |
-
translate_option = st.selectbox("Translate the answer to", ["None", "Urdu", "Hindi", "Spanish", "French"])
|
| 271 |
-
language_codes = {
|
| 272 |
-
"Urdu": "ur",
|
| 273 |
-
"Hindi": "hi",
|
| 274 |
-
"Spanish": "es",
|
| 275 |
-
"French": "fr"
|
| 276 |
-
}
|
| 277 |
-
|
| 278 |
-
if translate_option != "None":
|
| 279 |
-
translated_text = GoogleTranslator(source='auto', target='fr').translate(text)
|
| 280 |
-
st.markdown(f"### 🌐 Translated Answer ({translate_option})")
|
| 281 |
-
st.write(translated_text)
|
| 282 |
-
|
| 283 |
-
# Audio playback
|
| 284 |
-
audio_file_path = text_to_speech(translated_text, language_codes.get(translate_option, "en"))
|
| 285 |
-
if audio_file_path:
|
| 286 |
-
st.audio(audio_file_path, format="audio/mp3")
|
| 287 |
-
|
| 288 |
-
elif uploaded_file is None:
|
| 289 |
-
st.info("Please upload a PDF document to begin.")
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
st.subheader("About")
|
| 293 |
-
st.markdown("""
|
| 294 |
-
This app uses Retrieval-Augmented Generation (RAG) to answer questions about uploaded documents.
|
| 295 |
-
1. Upload a document
|
| 296 |
-
2. Ask a question
|
| 297 |
-
3. Optionally translate responses to other languages
|
| 298 |
-
""")
|
| 299 |
|
| 300 |
# Main content area
|
| 301 |
col1, col2 = st.columns([2, 1])
|
|
@@ -314,13 +254,41 @@ with col1:
|
|
| 314 |
st.success(f"Document indexed successfully! Created {total_chunks} chunks.")
|
| 315 |
|
| 316 |
# Display document preview
|
| 317 |
-
with st.expander("Document Preview"):
|
| 318 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 319 |
|
| 320 |
with col2:
|
| 321 |
if st.session_state.chunks:
|
| 322 |
st.info(f"Document chunks: {len(st.session_state.chunks)}")
|
| 323 |
|
|
|
|
| 324 |
# Query and answer section
|
| 325 |
st.divider()
|
| 326 |
query = st.text_input("Ask a question about the document")
|
|
@@ -340,7 +308,11 @@ with col2:
|
|
| 340 |
}
|
| 341 |
lang_code = language_codes[language]
|
| 342 |
|
| 343 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 344 |
if index.ntotal == 0:
|
| 345 |
st.warning("Please upload and index a document first.")
|
| 346 |
else:
|
|
|
|
| 45 |
def get_api_key():
|
| 46 |
api_key = os.getenv("GROQ_API_KEY")
|
| 47 |
if not api_key:
|
| 48 |
+
st.error("GROQ_API_KEY environment variable is not set. Please set it before running the application.")
|
|
|
|
| 49 |
return api_key
|
| 50 |
|
| 51 |
# Session state initialization
|
|
|
|
| 125 |
def build_prompt(system_prompt, context_chunks, question):
|
| 126 |
context = "\n\n".join(context_chunks)
|
| 127 |
return f"""{system_prompt}
|
|
|
|
| 128 |
Context:
|
| 129 |
{context}
|
|
|
|
| 130 |
Question:
|
| 131 |
{question}
|
|
|
|
| 132 |
Answer: Please provide a comprehensive answer based only on the context provided."""
|
| 133 |
|
| 134 |
def generate_answer(prompt):
|
|
|
|
| 209 |
st.markdown("Upload a document and ask questions to get AI-powered answers with translation capabilities.")
|
| 210 |
|
| 211 |
# Add API key input in sidebar
|
| 212 |
+
with st.sidebar:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
# Add model selection
|
| 214 |
st.subheader("Model Selection")
|
| 215 |
model_choice = st.selectbox(
|
|
|
|
| 228 |
st.session_state.debug_mode = st.checkbox("Show Debug Information", value=st.session_state.debug_mode)
|
| 229 |
|
| 230 |
if st.session_state.last_query_time:
|
| 231 |
+
st.subheader("About")
|
| 232 |
+
st.markdown("""
|
| 233 |
+
This app uses Retrieval-Augmented Generation (RAG) to answer questions about uploaded documents.
|
| 234 |
+
1. Upload a document
|
| 235 |
+
2. Ask a question
|
| 236 |
+
3. Translate responses to other languages
|
| 237 |
+
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
|
| 240 |
# Main content area
|
| 241 |
col1, col2 = st.columns([2, 1])
|
|
|
|
| 254 |
st.success(f"Document indexed successfully! Created {total_chunks} chunks.")
|
| 255 |
|
| 256 |
# Display document preview
|
| 257 |
+
with st.expander("Document Preview"):
|
| 258 |
+
# Extract and display key points
|
| 259 |
+
st.subheader("Key Points")
|
| 260 |
+
|
| 261 |
+
# Simple algorithm to extract potential key points (sentences that might be important)
|
| 262 |
+
sentences = raw_text.split('. ')
|
| 263 |
+
key_points = []
|
| 264 |
+
|
| 265 |
+
# Look for sentences that might be key points (contains keywords, not too long/short)
|
| 266 |
+
for sentence in sentences[:50]: # Check first 50 sentences
|
| 267 |
+
sentence = sentence.strip()
|
| 268 |
+
if len(sentence) > 15 and len(sentence) < 200: # Reasonable length for a key point
|
| 269 |
+
# Keywords that might indicate important information
|
| 270 |
+
important_keywords = ["important", "key", "significant", "main", "primary", "essential",
|
| 271 |
+
"critical", "crucial", "fundamental", "major", "summary", "conclusion"]
|
| 272 |
+
|
| 273 |
+
if any(keyword in sentence.lower() for keyword in important_keywords) or sentence.endswith(':'):
|
| 274 |
+
key_points.append(sentence)
|
| 275 |
+
|
| 276 |
+
# If we didn't find obvious key points, just take some representative sentences
|
| 277 |
+
if len(key_points) < 3:
|
| 278 |
+
key_points = [s.strip() for s in sentences[:50:10] if len(s.strip()) > 15][:5] # Every 10th sentence from first 50
|
| 279 |
+
|
| 280 |
+
# Display the key points as bullets
|
| 281 |
+
for point in key_points[:5]: # Show up to 5 key points
|
| 282 |
+
st.markdown(f"• {point}")
|
| 283 |
+
|
| 284 |
+
if not key_points:
|
| 285 |
+
st.info("No clear key points detected. Try exploring the full document.")
|
| 286 |
|
| 287 |
with col2:
|
| 288 |
if st.session_state.chunks:
|
| 289 |
st.info(f"Document chunks: {len(st.session_state.chunks)}")
|
| 290 |
|
| 291 |
+
# Query and answer section
|
| 292 |
# Query and answer section
|
| 293 |
st.divider()
|
| 294 |
query = st.text_input("Ask a question about the document")
|
|
|
|
| 308 |
}
|
| 309 |
lang_code = language_codes[language]
|
| 310 |
|
| 311 |
+
# Add a submit button
|
| 312 |
+
submit_button = st.button("Get Answer", type="primary", key="submit_query")
|
| 313 |
+
|
| 314 |
+
# Only process when the button is clicked and there's a query
|
| 315 |
+
if submit_button and query:
|
| 316 |
if index.ntotal == 0:
|
| 317 |
st.warning("Please upload and index a document first.")
|
| 318 |
else:
|