File size: 3,185 Bytes
653f0aa
69800ee
bb116be
 
 
69800ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bb116be
 
 
653f0aa
69800ee
 
 
 
 
 
 
 
 
9075076
 
2f0d46d
7d6aa6f
bb116be
 
 
7d6aa6f
 
 
bb116be
7d6aa6f
 
653f0aa
 
bb116be
653f0aa
 
7d6aa6f
 
 
 
 
 
69800ee
7d6aa6f
 
 
 
 
69800ee
 
 
 
7d6aa6f
 
 
 
653f0aa
7d6aa6f
 
 
69800ee
653f0aa
69800ee
7d6aa6f
69800ee
7d6aa6f
 
bb116be
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import os
import openai
from utils._admin_util import create_embeddings, create_vector_store, read_pdf_data, split_data
import streamlit as st
from dotenv import load_dotenv

def validate_api_key(api_key):
    """Test if the API key is valid"""
    try:
       # Make a small test request to OpenAI
        client = openai.OpenAI(api_key=api_key)
        client.embeddings.create(input="test", model="text-embedding-ada-002")
        return True
    except openai.AuthenticationError:
        st.error("❌ Invalid API key")
        return False
    except openai.PermissionDeniedError:
        st.error("❌ Permission denied. Please check your API key's permissions")
        return False
    except Exception as e:
        st.error(f"❌ API key validation error: {str(e)}")
        return False

def main():
    load_dotenv()
    
    # Add detailed API key verification
    api_key = os.getenv("OPENAI_API_KEY")
    if not api_key:
        st.error("❌ OpenAI API key not found! Please ensure it's set in the environment variables.")
        st.info("To set up your API key:")
        st.code("1. Go to Hugging Face Space settings\n2. Add OPENAI_API_KEY in Repository Secrets")
        st.stop()
    
    # Validate the API key
    # if not validate_api_key(api_key):
    #     st.stop()
    
    #print("API KEY :",api_key)

    st.set_page_config(page_title="Dump PDFs to QDrant - Vector Store")
    st.title("Please upload your files...πŸ“ ")
    try:
        # Upload multiple PDF files
        uploaded_files = st.file_uploader("Upload PDF files", type=["pdf"], accept_multiple_files=True)

        if uploaded_files:
        
            with st.spinner('Processing PDF files...'):
                all_chunks = []
                
                # Process each PDF file
                for pdf in uploaded_files:
                  
                    st.write(f"Processing: {pdf.name}")
                    
                    # Extract text from PDF
                    text = read_pdf_data(pdf)
                    st.write(f"πŸ‘‰ Reading {pdf.name} done")

                    # Create chunks for this PDF
                    chunks = split_data(text)
                    all_chunks.extend(chunks)
                    st.write(f"πŸ‘‰ Splitting {pdf.name} into chunks done")
                    
                if not all_chunks:
                    st.error("❌ No valid chunks were created from the PDFs")
                    st.stop()

                st.write("Creating embeddings...")
                embeddings = create_embeddings()
                st.write("πŸ‘‰ Creating embeddings instance done")
        
                # Create vector store with all chunks
                vector_store = create_vector_store(embeddings, all_chunks)
                st.session_state.vector_store = vector_store
            
                st.success(f"βœ… Successfully processed {len(uploaded_files)} files and pushed embeddings to Qdrant")
                st.write(f"Total chunks created: {len(all_chunks)}")

    except Exception as e:
            st.error(f"❌ An unexpected error occurred: {str(e)}")
            
            
if __name__ == '__main__':
    main()