pratikshahp commited on
Commit
4074cd7
·
verified ·
1 Parent(s): 4a1ce91

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -0
app.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import fitz # PyMuPDF
3
+ from langchain_community.vectorstores import Chroma
4
+ from langchain_community.embeddings import HuggingFaceEmbeddings
5
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
6
+ from openai import OpenAI
7
+ from dotenv import load_dotenv
8
+ import os
9
+
10
+ # Load environment variables
11
+ load_dotenv()
12
+ api_key = os.getenv("OPENAI_API_KEY")
13
+
14
+ # Initialize OpenAI client
15
+ client = OpenAI(api_key=api_key)
16
+
17
+ # Utility Functions
18
+ def load_pdf(file):
19
+ """Extract text from a PDF file."""
20
+ try:
21
+ doc = fitz.open(stream=file.read(), filetype="pdf")
22
+ return "".join([page.get_text() for page in doc])
23
+ except Exception as e:
24
+ st.error(f"Error reading PDF: {e}")
25
+ return ""
26
+
27
+ def split_text(text, chunk_size=1000, chunk_overlap=20):
28
+ """Split text into manageable chunks."""
29
+ text_splitter = RecursiveCharacterTextSplitter(
30
+ chunk_size=chunk_size, chunk_overlap=chunk_overlap, length_function=len, is_separator_regex=False
31
+ )
32
+ return text_splitter.create_documents([text])
33
+
34
+ def create_and_load_db(chunks, persist_directory="pdf_embeddings"):
35
+ """Create and load ChromaDB."""
36
+ embeddings = HuggingFaceEmbeddings()
37
+ vectordb = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=persist_directory)
38
+ vectordb.persist()
39
+ return Chroma(persist_directory=persist_directory, embedding_function=embeddings)
40
+
41
+ def generate_response(context, question):
42
+ """Generate a response using OpenAI."""
43
+ try:
44
+ messages = [
45
+ {"role": "system", "content": "You are an assistant that answers questions based on PDF content."},
46
+ {"role": "user", "content": f"Context: {context}\n\nQuestion: {question}\n\nAnswer:"}
47
+ ]
48
+ response = client.chat.completions.create(
49
+ model="gpt-3.5-turbo", # Replace with preferred model
50
+ messages=messages,
51
+ max_tokens=150,
52
+ )
53
+ return response.choices[0].message.content.strip()
54
+ except Exception as e:
55
+ st.error(f"Error generating response: {e}")
56
+ return None
57
+
58
+ # Main Application Logic
59
+ def main():
60
+ st.title("PDF Chatbot with OpenAI")
61
+
62
+ # Sidebar: File upload
63
+ uploaded_file = st.sidebar.file_uploader("Upload a PDF", type=['pdf'])
64
+ prompt = st.text_input("Ask a Question", "")
65
+ submitted = st.button("Submit")
66
+
67
+ if submitted and uploaded_file:
68
+ pdf_text = load_pdf(uploaded_file)
69
+
70
+ if pdf_text:
71
+ st.write("PDF Content Loaded!")
72
+ chunks = split_text(pdf_text)
73
+ vectordb = create_and_load_db(chunks)
74
+
75
+ if prompt:
76
+ docs = vectordb.similarity_search(prompt)
77
+ if docs:
78
+ context = docs[0].page_content
79
+ response = generate_response(context, prompt)
80
+ st.subheader("Generated Answer:")
81
+ st.write(response)
82
+ else:
83
+ st.warning("No relevant information found.")
84
+ else:
85
+ st.error("Unable to extract text from the PDF.")
86
+
87
+ if __name__ == "__main__":
88
+ main()