import os import openai from utils._admin_util import create_embeddings, create_vector_store, read_pdf_data, split_data import streamlit as st from dotenv import load_dotenv def validate_api_key(api_key): """Test if the API key is valid""" try: # Make a small test request to OpenAI client = openai.OpenAI(api_key=api_key) client.embeddings.create(input="test", model="text-embedding-ada-002") return True except openai.AuthenticationError: st.error("❌ Invalid API key") return False except openai.PermissionDeniedError: st.error("❌ Permission denied. Please check your API key's permissions") return False except Exception as e: st.error(f"❌ API key validation error: {str(e)}") return False def main(): load_dotenv() # Add detailed API key verification api_key = os.getenv("OPENAI_API_KEY") if not api_key: st.error("❌ OpenAI API key not found! Please ensure it's set in the environment variables.") st.info("To set up your API key:") st.code("1. Go to Hugging Face Space settings\n2. Add OPENAI_API_KEY in Repository Secrets") st.stop() # Validate the API key # if not validate_api_key(api_key): # st.stop() #print("API KEY :",api_key) st.set_page_config(page_title="Dump PDFs to QDrant - Vector Store") st.title("Please upload your files...📁 ") try: # Upload multiple PDF files uploaded_files = st.file_uploader("Upload PDF files", type=["pdf"], accept_multiple_files=True) if uploaded_files: with st.spinner('Processing PDF files...'): all_chunks = [] # Process each PDF file for pdf in uploaded_files: st.write(f"Processing: {pdf.name}") # Extract text from PDF text = read_pdf_data(pdf) st.write(f"👉 Reading {pdf.name} done") # Create chunks for this PDF chunks = split_data(text) all_chunks.extend(chunks) st.write(f"👉 Splitting {pdf.name} into chunks done") if not all_chunks: st.error("❌ No valid chunks were created from the PDFs") st.stop() st.write("Creating embeddings...") embeddings = create_embeddings() st.write("👉 Creating embeddings instance done") # Create vector store with all chunks vector_store = create_vector_store(embeddings, all_chunks) st.session_state.vector_store = vector_store st.success(f"✅ Successfully processed {len(uploaded_files)} files and pushed embeddings to Qdrant") st.write(f"Total chunks created: {len(all_chunks)}") except Exception as e: st.error(f"❌ An unexpected error occurred: {str(e)}") if __name__ == '__main__': main()