Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| PDF Chat Web Application - Hugging Face Spaces Version | |
| ====================================================== | |
| A Streamlit web app for chatting with PDF documents using OpenAI. | |
| Deployed on Hugging Face Spaces for public access. | |
| """ | |
| import streamlit as st | |
| import os | |
| import tempfile | |
| import PyPDF2 | |
| from io import BytesIO | |
| import requests | |
| import json | |
| # Page configuration | |
| st.set_page_config( | |
| page_title="PDF Chat Assistant", | |
| page_icon="π", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # Custom CSS for modern dark theme | |
| st.markdown(""" | |
| <style> | |
| .stApp { | |
| background: linear-gradient(135deg, #1e1e1e 0%, #2d2d2d 100%); | |
| color: #ffffff; | |
| } | |
| .main-header { | |
| background: linear-gradient(135deg, #2d2d2d 0%, #3d3d3d 100%); | |
| padding: 2rem; | |
| border-radius: 15px; | |
| text-align: center; | |
| margin-bottom: 2rem; | |
| box-shadow: 0 4px 6px rgba(0,0,0,0.3); | |
| border: 1px solid #404040; | |
| } | |
| .feature-card { | |
| background: linear-gradient(135deg, #2d2d2d 0%, #3d3d3d 100%); | |
| padding: 1.5rem; | |
| border-radius: 10px; | |
| margin: 1rem 0; | |
| border: 1px solid #404040; | |
| box-shadow: 0 2px 4px rgba(0,0,0,0.2); | |
| } | |
| .user-message { | |
| background: linear-gradient(135deg, #007bff 0%, #0056b3 100%); | |
| color: white; | |
| padding: 1rem; | |
| border-radius: 15px; | |
| margin: 0.5rem 0; | |
| margin-left: 15%; | |
| box-shadow: 0 2px 4px rgba(0,123,255,0.3); | |
| } | |
| .ai-message { | |
| background: linear-gradient(135deg, #495057 0%, #343a40 100%); | |
| color: white; | |
| padding: 1rem; | |
| border-radius: 15px; | |
| margin: 0.5rem 0; | |
| margin-right: 15%; | |
| box-shadow: 0 2px 4px rgba(73,80,87,0.3); | |
| } | |
| .success-message { | |
| background: linear-gradient(135deg, #28a745 0%, #20c997 100%); | |
| color: white; | |
| padding: 1rem; | |
| border-radius: 10px; | |
| margin: 1rem 0; | |
| box-shadow: 0 2px 4px rgba(40,167,69,0.3); | |
| } | |
| .warning-message { | |
| background: linear-gradient(135deg, #ffc107 0%, #fd7e14 100%); | |
| color: #212529; | |
| padding: 1rem; | |
| border-radius: 10px; | |
| margin: 1rem 0; | |
| box-shadow: 0 2px 4px rgba(255,193,7,0.3); | |
| } | |
| .info-message { | |
| background: linear-gradient(135deg, #17a2b8 0%, #20c997 100%); | |
| color: white; | |
| padding: 1rem; | |
| border-radius: 10px; | |
| margin: 1rem 0; | |
| box-shadow: 0 2px 4px rgba(23,162,184,0.3); | |
| } | |
| .stTextInput > div > div > input { | |
| background-color: #3d3d3d; | |
| color: white; | |
| border: 2px solid #555; | |
| border-radius: 10px; | |
| padding: 0.5rem; | |
| } | |
| .stTextInput > div > div > input:focus { | |
| border-color: #007bff; | |
| box-shadow: 0 0 0 0.2rem rgba(0,123,255,0.25); | |
| } | |
| .stButton > button { | |
| background: linear-gradient(135deg, #007bff 0%, #0056b3 100%); | |
| color: white; | |
| border: none; | |
| border-radius: 10px; | |
| padding: 0.5rem 1rem; | |
| font-weight: 600; | |
| transition: all 0.3s ease; | |
| } | |
| .stButton > button:hover { | |
| background: linear-gradient(135deg, #0056b3 0%, #004085 100%); | |
| transform: translateY(-2px); | |
| box-shadow: 0 4px 8px rgba(0,123,255,0.3); | |
| } | |
| .upload-area { | |
| border: 2px dashed #007bff; | |
| border-radius: 15px; | |
| padding: 2rem; | |
| text-align: center; | |
| background: rgba(0,123,255,0.1); | |
| margin: 1rem 0; | |
| } | |
| .stats-container { | |
| display: grid; | |
| grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); | |
| gap: 1rem; | |
| margin: 1rem 0; | |
| } | |
| .stat-card { | |
| background: linear-gradient(135deg, #343a40 0%, #495057 100%); | |
| padding: 1rem; | |
| border-radius: 10px; | |
| text-align: center; | |
| border: 1px solid #404040; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| class PDFChatBot: | |
| def __init__(self): | |
| self.pdf_text = "" | |
| self.conversation_history = [] | |
| self.pdf_pages = 0 | |
| self.pdf_chars = 0 | |
| def extract_pdf_text(self, pdf_file): | |
| """Extract text from PDF using PyPDF2""" | |
| try: | |
| pdf_reader = PyPDF2.PdfReader(pdf_file) | |
| text = "" | |
| page_count = len(pdf_reader.pages) | |
| for page in pdf_reader.pages: | |
| text += page.extract_text() + "\n" | |
| if not text.strip(): | |
| return False, "Could not extract text from PDF. The PDF might contain only images or be password protected." | |
| self.pdf_text = text | |
| self.pdf_pages = page_count | |
| self.pdf_chars = len(text) | |
| return True, f"Successfully extracted text from {page_count} pages ({len(text):,} characters)!" | |
| except Exception as e: | |
| return False, f"Error reading PDF: {str(e)}" | |
| def ask_openai(self, question, api_key): | |
| """Ask OpenAI directly using the API""" | |
| try: | |
| # Limit context to prevent token limits | |
| context_limit = 3000 | |
| context = f"Based on the following document content, please answer the question accurately and concisely.\n\nDocument:\n{self.pdf_text[:context_limit]}\n\nQuestion: {question}" | |
| headers = { | |
| "Authorization": f"Bearer {api_key}", | |
| "Content-Type": "application/json" | |
| } | |
| data = { | |
| "model": "gpt-3.5-turbo", | |
| "messages": [ | |
| {"role": "system", "content": "You are a helpful AI assistant that answers questions about documents. Be accurate, concise, and helpful. If you cannot find the answer in the document, say so clearly."}, | |
| {"role": "user", "content": context} | |
| ], | |
| "max_tokens": 1000, | |
| "temperature": 0.1 | |
| } | |
| response = requests.post( | |
| "https://api.openai.com/v1/chat/completions", | |
| headers=headers, | |
| json=data, | |
| timeout=30 | |
| ) | |
| if response.status_code == 200: | |
| result = response.json() | |
| answer = result['choices'][0]['message']['content'] | |
| # Store in conversation history | |
| self.conversation_history.append({"question": question, "answer": answer}) | |
| return answer | |
| elif response.status_code == 401: | |
| return "β Invalid API key. Please check your OpenAI API key and try again." | |
| elif response.status_code == 429: | |
| return "β³ Rate limit exceeded. Please wait a moment and try again." | |
| else: | |
| return f"β API Error: {response.status_code} - Please check your API key and try again." | |
| except requests.exceptions.Timeout: | |
| return "β³ Request timed out. Please try again." | |
| except requests.exceptions.ConnectionError: | |
| return "π Connection error. Please check your internet connection." | |
| except Exception as e: | |
| return f"β Error: {str(e)}" | |
| def main(): | |
| # Initialize session state | |
| if 'bot' not in st.session_state: | |
| st.session_state.bot = PDFChatBot() | |
| if 'messages' not in st.session_state: | |
| st.session_state.messages = [] | |
| if 'pdf_processed' not in st.session_state: | |
| st.session_state.pdf_processed = False | |
| if 'uploaded_file_name' not in st.session_state: | |
| st.session_state.uploaded_file_name = "" | |
| # Header | |
| st.markdown(""" | |
| <div class="main-header"> | |
| <h1>π PDF Chat Assistant</h1> | |
| <p style="font-size: 1.2em; margin: 0.5rem 0;">Upload any PDF and start an intelligent conversation with your document!</p> | |
| <p style="color: #aaa; margin-top: 1rem;">Powered by OpenAI GPT β’ Built with β€οΈ for Hugging Face Spaces</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Sidebar | |
| with st.sidebar: | |
| st.markdown("### π Configuration") | |
| api_key = st.text_input( | |
| "OpenAI API Key", | |
| type="password", | |
| help="Enter your OpenAI API key to start chatting with PDFs", | |
| placeholder="sk-..." | |
| ) | |
| if api_key: | |
| st.success("β API Key Provided") | |
| else: | |
| st.error("β API Key Required") | |
| st.markdown("---") | |
| st.markdown("### π How to Use") | |
| st.markdown(""" | |
| 1. **π Enter your OpenAI API key** above | |
| 2. **π€ Upload a PDF file** using the uploader | |
| 3. **β³ Wait** for text extraction (few seconds) | |
| 4. **π¬ Ask questions** about your document | |
| 5. **π§ Get AI-powered answers** instantly! | |
| """) | |
| st.markdown("---") | |
| st.markdown("### π― Features") | |
| st.markdown(""" | |
| β’ π **PDF Text Extraction** | |
| β’ π€ **AI-Powered Q&A** | |
| β’ πΎ **Conversation Memory** | |
| β’ π¨ **Beautiful Interface** | |
| β’ π **Fast & Responsive** | |
| β’ π **Privacy Focused** | |
| """) | |
| st.markdown("---") | |
| if st.button("ποΈ Clear Chat History", use_container_width=True): | |
| st.session_state.messages = [] | |
| st.session_state.bot = PDFChatBot() | |
| st.session_state.pdf_processed = False | |
| st.session_state.uploaded_file_name = "" | |
| st.success("β Chat history cleared!") | |
| st.rerun() | |
| # API Key Info | |
| with st.expander("βΉοΈ Get OpenAI API Key"): | |
| st.markdown(""" | |
| **How to get your API key:** | |
| 1. Go to [OpenAI Platform](https://platform.openai.com) | |
| 2. Sign up or log in to your account | |
| 3. Navigate to **API Keys** section | |
| 4. Click **"Create new secret key"** | |
| 5. Copy the key (starts with `sk-`) | |
| 6. Paste it in the field above | |
| **Note:** Your API key is only used for this session and is not stored anywhere. | |
| """) | |
| # Status | |
| st.markdown("---") | |
| st.markdown("### π Status") | |
| if st.session_state.pdf_processed: | |
| st.success("β PDF Ready") | |
| st.info(f"π {st.session_state.uploaded_file_name}") | |
| # Display PDF stats | |
| bot = st.session_state.bot | |
| st.markdown(f""" | |
| **π Document Stats:** | |
| - Pages: {bot.pdf_pages} | |
| - Characters: {bot.pdf_chars:,} | |
| - Conversations: {len(bot.conversation_history)} | |
| """) | |
| else: | |
| st.warning("β³ No PDF loaded") | |
| if api_key: | |
| st.success("β API Connected") | |
| else: | |
| st.error("β API Key Missing") | |
| # Main content | |
| col1, col2 = st.columns([1, 2]) | |
| with col1: | |
| st.markdown("### π€ Upload Your PDF") | |
| uploaded_file = st.file_uploader( | |
| "Choose a PDF file", | |
| type="pdf", | |
| help="Upload any PDF document (max 200MB)", | |
| label_visibility="collapsed" | |
| ) | |
| if not uploaded_file: | |
| st.markdown(""" | |
| <div class="upload-area"> | |
| <h3>π Drag & Drop Your PDF Here</h3> | |
| <p>Or click "Browse files" above to select a PDF</p> | |
| <br> | |
| <p><small>π Supported: PDF files up to 200MB<br> | |
| π Your files are processed securely and not stored</small></p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| if uploaded_file and api_key: | |
| if not st.session_state.pdf_processed or st.session_state.uploaded_file_name != uploaded_file.name: | |
| with st.spinner("π Extracting text from your PDF..."): | |
| success, message = st.session_state.bot.extract_pdf_text(uploaded_file) | |
| if success: | |
| st.markdown(f""" | |
| <div class="success-message"> | |
| <h4>β PDF Processed Successfully!</h4> | |
| <p>{message}</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| st.session_state.pdf_processed = True | |
| st.session_state.uploaded_file_name = uploaded_file.name | |
| # Show file details | |
| file_size = uploaded_file.size / 1024 # KB | |
| bot = st.session_state.bot | |
| st.markdown(f""" | |
| <div class="info-message"> | |
| <strong>π File:</strong> {uploaded_file.name}<br> | |
| <strong>π Size:</strong> {file_size:.1f} KB<br> | |
| <strong>π Pages:</strong> {bot.pdf_pages}<br> | |
| <strong>π Characters:</strong> {bot.pdf_chars:,}<br> | |
| <strong>π― Status:</strong> Ready for questions! | |
| </div> | |
| """, unsafe_allow_html=True) | |
| else: | |
| st.markdown(f""" | |
| <div class="warning-message"> | |
| <h4>β οΈ Processing Failed</h4> | |
| <p>{message}</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| elif uploaded_file and not api_key: | |
| st.markdown(""" | |
| <div class="warning-message"> | |
| <h4>β οΈ API Key Required</h4> | |
| <p>Please enter your OpenAI API key in the sidebar to process the PDF.</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Example questions | |
| if st.session_state.pdf_processed: | |
| st.markdown("### π‘ Try These Questions") | |
| example_questions = [ | |
| "π What is this document about?", | |
| "π Summarize the main points", | |
| "π What are the key details?", | |
| "π Give me important information", | |
| "β What questions can I ask?" | |
| ] | |
| for question in example_questions: | |
| if st.button(question, key=f"example_{question}", use_container_width=True): | |
| # Trigger the question | |
| question_text = question.split(" ", 1)[1] # Remove emoji | |
| st.session_state.pending_question = question_text | |
| st.rerun() | |
| with col2: | |
| st.markdown("### π¬ Chat with Your PDF") | |
| # Chat container | |
| chat_container = st.container() | |
| with chat_container: | |
| if st.session_state.messages: | |
| for message in st.session_state.messages: | |
| if message["role"] == "user": | |
| st.markdown(f""" | |
| <div class="user-message"> | |
| <strong>π§ You:</strong> {message["content"]} | |
| </div> | |
| """, unsafe_allow_html=True) | |
| else: | |
| st.markdown(f""" | |
| <div class="ai-message"> | |
| <strong>π€ AI:</strong> {message["content"]} | |
| </div> | |
| """, unsafe_allow_html=True) | |
| else: | |
| if st.session_state.pdf_processed: | |
| st.markdown(""" | |
| <div class="ai-message"> | |
| <strong>π€ AI:</strong> Hello! I've analyzed your PDF document. What would you like to know about it? Feel free to ask any questions! | |
| </div> | |
| """, unsafe_allow_html=True) | |
| else: | |
| st.markdown(""" | |
| <div class="feature-card" style="text-align: center; padding: 3rem;"> | |
| <h3>π Welcome to PDF Chat Assistant!</h3> | |
| <p style="font-size: 1.1em; margin: 1rem 0;">Transform any PDF into an interactive conversation</p> | |
| <br> | |
| <div class="stats-container"> | |
| <div class="stat-card"> | |
| <h4>π Smart</h4> | |
| <p>AI understands your documents</p> | |
| </div> | |
| <div class="stat-card"> | |
| <h4>β‘ Fast</h4> | |
| <p>Instant answers to your questions</p> | |
| </div> | |
| <div class="stat-card"> | |
| <h4>π Secure</h4> | |
| <p>Your data stays private</p> | |
| </div> | |
| </div> | |
| <br> | |
| <p><strong>Get started:</strong> Add your API key and upload a PDF!</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Input area | |
| st.markdown("---") | |
| # Check for pending question from example buttons | |
| if hasattr(st.session_state, 'pending_question'): | |
| user_question = st.session_state.pending_question | |
| del st.session_state.pending_question | |
| else: | |
| user_question = st.text_input( | |
| "Ask a question about your PDF:", | |
| placeholder="e.g., What are the main topics discussed in this document?", | |
| disabled=not st.session_state.pdf_processed, | |
| key="user_input" | |
| ) | |
| col_btn1, col_btn2, col_btn3 = st.columns([2, 1, 1]) | |
| with col_btn1: | |
| send_button = st.button("π€ Send Message", disabled=not st.session_state.pdf_processed, use_container_width=True) | |
| with col_btn2: | |
| if st.button("π Refresh", disabled=not st.session_state.pdf_processed, use_container_width=True): | |
| st.rerun() | |
| # Process question | |
| if (send_button or hasattr(st.session_state, 'pending_question')) and user_question and st.session_state.pdf_processed and api_key: | |
| # Add user message | |
| st.session_state.messages.append({"role": "user", "content": user_question}) | |
| # Get AI response | |
| with st.spinner("π€ AI is analyzing your question..."): | |
| ai_response = st.session_state.bot.ask_openai(user_question, api_key) | |
| # Add AI response | |
| st.session_state.messages.append({"role": "assistant", "content": ai_response}) | |
| st.rerun() | |
| elif send_button and not st.session_state.pdf_processed: | |
| st.warning("β οΈ Please upload and process a PDF first!") | |
| elif send_button and not api_key: | |
| st.warning("β οΈ Please enter your OpenAI API key in the sidebar!") | |
| # Footer | |
| st.markdown("---") | |
| st.markdown(""" | |
| <div style="text-align: center; color: #888; padding: 2rem;"> | |
| <h4>π PDF Chat Assistant</h4> | |
| <p>Made with β€οΈ using Streamlit β’ Powered by OpenAI GPT-3.5 β’ Hosted on π€ Hugging Face Spaces</p> | |
| <p><small>π Upload PDFs β’ π¬ Ask Questions β’ π§ Get AI Answers β’ π Privacy First</small></p> | |
| <br> | |
| <p><small>β Like this app? Give it a star on Hugging Face Spaces!</small></p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| if __name__ == "__main__": | |
| main() |