Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import os | |
| from PIL import Image | |
| import google.generativeai as genai | |
| from utils.document_processing import process_pdf | |
| from utils.models import load_models | |
| from utils.rag import query_pipeline | |
| # Configure the app | |
| st.set_page_config( | |
| page_title="PDF RAG Pipeline", | |
| page_icon="📄", | |
| layout="wide" | |
| ) | |
| # Initialize session state | |
| if 'models_loaded' not in st.session_state: | |
| st.session_state.models_loaded = False | |
| if 'processed_docs' not in st.session_state: | |
| st.session_state.processed_docs = None | |
| # Sidebar for configuration | |
| with st.sidebar: | |
| st.title("Configuration") | |
| # API keys | |
| groq_api_key = st.text_input("Groq API Key", type="password") | |
| google_api_key = st.text_input("Google API Key", type="password") | |
| # Model selection | |
| embedding_model = st.selectbox( | |
| "Embedding Model", | |
| ["ibm-granite/granite-embedding-30m-english"], | |
| index=0 | |
| ) | |
| llm_model = st.selectbox( | |
| "LLM Model", | |
| ["llama3-70b-8192"], | |
| index=0 | |
| ) | |
| # File upload | |
| uploaded_file = st.file_uploader( | |
| "Upload a PDF file", | |
| type=["pdf"], | |
| accept_multiple_files=False | |
| ) | |
| if st.button("Initialize Models"): | |
| with st.spinner("Loading models..."): | |
| try: | |
| # Load models | |
| embeddings_model, embeddings_tokenizer, vision_model, llm_model = load_models( | |
| embedding_model=embedding_model, | |
| llm_model=llm_model, | |
| google_api_key=google_api_key, | |
| groq_api_key=groq_api_key | |
| ) | |
| st.session_state.embeddings_model = embeddings_model | |
| st.session_state.embeddings_tokenizer = embeddings_tokenizer | |
| st.session_state.vision_model = vision_model | |
| st.session_state.llm_model = llm_model | |
| st.session_state.models_loaded = True | |
| st.success("Models loaded successfully!") | |
| except Exception as e: | |
| st.error(f"Error loading models: {str(e)}") | |
| # Main app interface | |
| st.title("PDF RAG Pipeline") | |
| st.write("Upload a PDF and ask questions about its content") | |
| if uploaded_file and st.session_state.models_loaded: | |
| with st.spinner("Processing PDF..."): | |
| try: | |
| # Save uploaded file temporarily | |
| file_path = f"./temp_{uploaded_file.name}" | |
| with open(file_path, "wb") as f: | |
| f.write(uploaded_file.getbuffer()) | |
| # Process the PDF | |
| texts, tables, pictures = process_pdf( | |
| file_path, | |
| st.session_state.embeddings_tokenizer, | |
| st.session_state.vision_model | |
| ) | |
| st.session_state.processed_docs = { | |
| "texts": texts, | |
| "tables": tables, | |
| "pictures": pictures | |
| } | |
| st.success("PDF processed successfully!") | |
| # Display document stats | |
| col1, col2, col3 = st.columns(3) | |
| col1.metric("Text Chunks", len(texts)) | |
| col2.metric("Tables", len(tables)) | |
| col3.metric("Images", len(pictures)) | |
| # Remove temp file | |
| os.remove(file_path) | |
| except Exception as e: | |
| st.error(f"Error processing PDF: {str(e)}") | |
| # Question answering section | |
| if st.session_state.processed_docs: | |
| st.divider() | |
| st.subheader("Ask a Question") | |
| question = st.text_input("Enter your question about the document:") | |
| if question and st.button("Get Answer"): | |
| with st.spinner("Generating answer..."): | |
| try: | |
| answer = query_pipeline( | |
| question=question, | |
| texts=st.session_state.processed_docs["texts"], | |
| tables=st.session_state.processed_docs["tables"], | |
| pictures=st.session_state.processed_docs["pictures"], | |
| embeddings_model=st.session_state.embeddings_model, | |
| llm_model=st.session_state.llm_model | |
| ) | |
| st.subheader("Answer") | |
| st.write(answer) | |
| except Exception as e: | |
| st.error(f"Error generating answer: {str(e)}") |