Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,7 +3,7 @@ import streamlit as st
|
|
| 3 |
import fitz # PyMuPDF
|
| 4 |
import openai
|
| 5 |
from dotenv import load_dotenv
|
| 6 |
-
import
|
| 7 |
|
| 8 |
# Load the environment variables from the .env file
|
| 9 |
load_dotenv()
|
|
@@ -12,7 +12,7 @@ pinecone_api_key = os.getenv('PINECONE_API_KEY')
|
|
| 12 |
pinecone_environment = os.getenv('PINECONE_ENVIRONMENT')
|
| 13 |
|
| 14 |
# Initialize Pinecone
|
| 15 |
-
|
| 16 |
|
| 17 |
# Streamlit app
|
| 18 |
st.title("Chat with Your Document")
|
|
@@ -34,9 +34,14 @@ if uploaded_file is not None:
|
|
| 34 |
|
| 35 |
# Create a Pinecone vector store
|
| 36 |
index_name = "pdf-analysis"
|
| 37 |
-
if index_name not in
|
| 38 |
-
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
# Add the PDF text to the vector store
|
| 42 |
vector_store.upsert([(str(i), openai.Embedding.create(input=pdf_text)["data"][0]["embedding"]) for i in range(len(pdf_text))])
|
|
|
|
| 3 |
import fitz # PyMuPDF
|
| 4 |
import openai
|
| 5 |
from dotenv import load_dotenv
|
| 6 |
+
from pinecone import Pinecone, ServerlessSpec
|
| 7 |
|
| 8 |
# Load the environment variables from the .env file
|
| 9 |
load_dotenv()
|
|
|
|
| 12 |
pinecone_environment = os.getenv('PINECONE_ENVIRONMENT')
|
| 13 |
|
| 14 |
# Initialize Pinecone
|
| 15 |
+
pc = Pinecone(api_key=pinecone_api_key)
|
| 16 |
|
| 17 |
# Streamlit app
|
| 18 |
st.title("Chat with Your Document")
|
|
|
|
| 34 |
|
| 35 |
# Create a Pinecone vector store
|
| 36 |
index_name = "pdf-analysis"
|
| 37 |
+
if index_name not in pc.list_indexes().names():
|
| 38 |
+
pc.create_index(
|
| 39 |
+
name=index_name,
|
| 40 |
+
dimension=512,
|
| 41 |
+
metric='euclidean',
|
| 42 |
+
spec=ServerlessSpec(cloud='aws', region=pinecone_environment)
|
| 43 |
+
)
|
| 44 |
+
vector_store = pc.Index(index_name)
|
| 45 |
|
| 46 |
# Add the PDF text to the vector store
|
| 47 |
vector_store.upsert([(str(i), openai.Embedding.create(input=pdf_text)["data"][0]["embedding"]) for i in range(len(pdf_text))])
|