Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,239 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from llama_index.core import VectorStoreIndex, Document
|
| 3 |
+
from llama_index.llms.openai import OpenAI
|
| 4 |
+
from llama_index.core import Settings
|
| 5 |
+
import os
|
| 6 |
+
import pdfplumber
|
| 7 |
+
from docx import Document as DocxDocument
|
| 8 |
+
import json
|
| 9 |
+
from datetime import datetime
|
| 10 |
+
|
| 11 |
+
# Global variables
|
| 12 |
+
chat_engine = None
|
| 13 |
+
conversation_history = []
|
| 14 |
+
|
| 15 |
+
# Function to read PDF files
|
| 16 |
+
def read_pdf(file_path):
|
| 17 |
+
with pdfplumber.open(file_path) as pdf:
|
| 18 |
+
text = ''
|
| 19 |
+
for page in pdf.pages:
|
| 20 |
+
text += page.extract_text() + '\n'
|
| 21 |
+
return text
|
| 22 |
+
|
| 23 |
+
# Function to read DOCX files
|
| 24 |
+
def read_docx(file_path):
|
| 25 |
+
doc = DocxDocument(file_path)
|
| 26 |
+
text = ''
|
| 27 |
+
for paragraph in doc.paragraphs:
|
| 28 |
+
text += paragraph.text + '\n'
|
| 29 |
+
return text
|
| 30 |
+
|
| 31 |
+
# Function to load and index documents
|
| 32 |
+
def load_data(files, api_key):
|
| 33 |
+
global chat_engine
|
| 34 |
+
|
| 35 |
+
if not files or not api_key:
|
| 36 |
+
return "Please provide both API key and files to proceed."
|
| 37 |
+
|
| 38 |
+
try:
|
| 39 |
+
docs = []
|
| 40 |
+
for file in files:
|
| 41 |
+
if file.name.endswith('.pdf'):
|
| 42 |
+
text = read_pdf(file.name)
|
| 43 |
+
docs.append(Document(text=text))
|
| 44 |
+
elif file.name.endswith('.docx'):
|
| 45 |
+
text = read_docx(file.name)
|
| 46 |
+
docs.append(Document(text=text))
|
| 47 |
+
|
| 48 |
+
# Set OpenAI API key
|
| 49 |
+
os.environ["OPENAI_API_KEY"] = api_key
|
| 50 |
+
|
| 51 |
+
Settings.llm = OpenAI(
|
| 52 |
+
model="gpt-3.5-turbo",
|
| 53 |
+
temperature=0.5,
|
| 54 |
+
api_key=api_key,
|
| 55 |
+
system_prompt="You are an expert on the Streamlit Python library and your job is to answer technical questions. Assume that all questions are related to the Streamlit Python library. Keep your answers technical and based on facts – do not hallucinate features."
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
index = VectorStoreIndex.from_documents(docs)
|
| 59 |
+
chat_engine = index.as_chat_engine(chat_mode="condense_question", verbose=True)
|
| 60 |
+
|
| 61 |
+
return "Documents loaded and indexed successfully! You can now start chatting."
|
| 62 |
+
except Exception as e:
|
| 63 |
+
return f"Error loading documents: {str(e)}"
|
| 64 |
+
|
| 65 |
+
# Function to handle chat
|
| 66 |
+
def chat_with_docs(message, history, api_key):
|
| 67 |
+
global chat_engine, conversation_history
|
| 68 |
+
|
| 69 |
+
if not api_key:
|
| 70 |
+
return history + [("Please enter your OpenAI API key first.", None)]
|
| 71 |
+
|
| 72 |
+
if chat_engine is None:
|
| 73 |
+
return history + [(message, "Please upload and load documents first before asking questions.")]
|
| 74 |
+
|
| 75 |
+
try:
|
| 76 |
+
response = chat_engine.chat(message)
|
| 77 |
+
conversation_history.append({"role": "user", "content": message})
|
| 78 |
+
conversation_history.append({"role": "assistant", "content": response.response})
|
| 79 |
+
|
| 80 |
+
return history + [(message, response.response)]
|
| 81 |
+
except Exception as e:
|
| 82 |
+
return history + [(message, f"Error: {str(e)}")]
|
| 83 |
+
|
| 84 |
+
# Function to save conversation
|
| 85 |
+
def save_conversation():
|
| 86 |
+
global conversation_history
|
| 87 |
+
|
| 88 |
+
if not conversation_history:
|
| 89 |
+
return "No conversation to save."
|
| 90 |
+
|
| 91 |
+
try:
|
| 92 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 93 |
+
with open("conversations.json", "a") as f:
|
| 94 |
+
conv_data = {
|
| 95 |
+
"timestamp": timestamp,
|
| 96 |
+
"messages": conversation_history
|
| 97 |
+
}
|
| 98 |
+
json.dump(conv_data, f)
|
| 99 |
+
f.write("\n")
|
| 100 |
+
return "Conversation saved successfully!"
|
| 101 |
+
except Exception as e:
|
| 102 |
+
return f"Error saving conversation: {str(e)}"
|
| 103 |
+
|
| 104 |
+
# Function to load previous conversations
|
| 105 |
+
def load_conversations():
|
| 106 |
+
if os.path.exists("conversations.json"):
|
| 107 |
+
try:
|
| 108 |
+
with open("conversations.json", "r") as f:
|
| 109 |
+
conversations = [json.loads(line) for line in f]
|
| 110 |
+
|
| 111 |
+
conv_text = ""
|
| 112 |
+
for i, conv in enumerate(conversations):
|
| 113 |
+
conv_text += f"\n{'='*50}\nConversation {i + 1}\n{'='*50}\n"
|
| 114 |
+
timestamp = conv.get("timestamp", "Unknown time")
|
| 115 |
+
conv_text += f"Timestamp: {timestamp}\n\n"
|
| 116 |
+
|
| 117 |
+
messages = conv.get("messages", conv) # Handle old format
|
| 118 |
+
for message in messages:
|
| 119 |
+
role = message.get('role', 'unknown')
|
| 120 |
+
content = message.get('content', '')
|
| 121 |
+
conv_text += f"{role.upper()}: {content}\n\n"
|
| 122 |
+
|
| 123 |
+
return conv_text if conv_text else "No previous conversations found."
|
| 124 |
+
except Exception as e:
|
| 125 |
+
return f"Error loading conversations: {str(e)}"
|
| 126 |
+
return "No previous conversations found."
|
| 127 |
+
|
| 128 |
+
# Function to clear current conversation
|
| 129 |
+
def clear_conversation():
|
| 130 |
+
global conversation_history
|
| 131 |
+
conversation_history = []
|
| 132 |
+
return []
|
| 133 |
+
|
| 134 |
+
# Function to delete all conversations
|
| 135 |
+
def delete_all_conversations():
|
| 136 |
+
try:
|
| 137 |
+
if os.path.exists("conversations.json"):
|
| 138 |
+
os.remove("conversations.json")
|
| 139 |
+
return "All conversations deleted successfully!"
|
| 140 |
+
return "No conversations to delete."
|
| 141 |
+
except Exception as e:
|
| 142 |
+
return f"Error deleting conversations: {str(e)}"
|
| 143 |
+
|
| 144 |
+
# Create Gradio interface
|
| 145 |
+
with gr.Blocks(title="Chat with Documents 💬 📚") as demo:
|
| 146 |
+
gr.Markdown("# Chat with Documents 💬 📚")
|
| 147 |
+
gr.Markdown("Upload PDF or DOCX files and chat with them using AI!")
|
| 148 |
+
|
| 149 |
+
with gr.Row():
|
| 150 |
+
with gr.Column(scale=2):
|
| 151 |
+
api_key_input = gr.Textbox(
|
| 152 |
+
label="OpenAI API Key",
|
| 153 |
+
type="password",
|
| 154 |
+
placeholder="Enter your OpenAI API key here..."
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
file_upload = gr.File(
|
| 158 |
+
label="Upload PDF or DOCX files",
|
| 159 |
+
file_count="multiple",
|
| 160 |
+
file_types=[".pdf", ".docx"]
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
+
load_btn = gr.Button("Load Documents", variant="primary")
|
| 164 |
+
load_status = gr.Textbox(label="Status", interactive=False)
|
| 165 |
+
|
| 166 |
+
load_btn.click(
|
| 167 |
+
fn=load_data,
|
| 168 |
+
inputs=[file_upload, api_key_input],
|
| 169 |
+
outputs=load_status
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
with gr.Row():
|
| 173 |
+
with gr.Column(scale=3):
|
| 174 |
+
chatbot = gr.Chatbot(label="Chat", height=400)
|
| 175 |
+
msg = gr.Textbox(
|
| 176 |
+
label="Your Question",
|
| 177 |
+
placeholder="Ask a question about your documents..."
|
| 178 |
+
)
|
| 179 |
+
|
| 180 |
+
with gr.Row():
|
| 181 |
+
submit_btn = gr.Button("Send", variant="primary")
|
| 182 |
+
clear_btn = gr.Button("Clear Chat")
|
| 183 |
+
|
| 184 |
+
with gr.Row():
|
| 185 |
+
save_btn = gr.Button("Save Conversation")
|
| 186 |
+
save_status = gr.Textbox(label="Save Status", interactive=False)
|
| 187 |
+
|
| 188 |
+
with gr.Column(scale=1):
|
| 189 |
+
gr.Markdown("### Previous Conversations")
|
| 190 |
+
load_convs_btn = gr.Button("Load Previous Conversations")
|
| 191 |
+
convs_display = gr.Textbox(
|
| 192 |
+
label="Conversation History",
|
| 193 |
+
lines=20,
|
| 194 |
+
interactive=False
|
| 195 |
+
)
|
| 196 |
+
delete_all_btn = gr.Button("Delete All Conversations", variant="stop")
|
| 197 |
+
delete_status = gr.Textbox(label="Delete Status", interactive=False)
|
| 198 |
+
|
| 199 |
+
# Event handlers
|
| 200 |
+
submit_btn.click(
|
| 201 |
+
fn=chat_with_docs,
|
| 202 |
+
inputs=[msg, chatbot, api_key_input],
|
| 203 |
+
outputs=chatbot
|
| 204 |
+
).then(
|
| 205 |
+
lambda: "",
|
| 206 |
+
outputs=msg
|
| 207 |
+
)
|
| 208 |
+
|
| 209 |
+
msg.submit(
|
| 210 |
+
fn=chat_with_docs,
|
| 211 |
+
inputs=[msg, chatbot, api_key_input],
|
| 212 |
+
outputs=chatbot
|
| 213 |
+
).then(
|
| 214 |
+
lambda: "",
|
| 215 |
+
outputs=msg
|
| 216 |
+
)
|
| 217 |
+
|
| 218 |
+
clear_btn.click(
|
| 219 |
+
fn=clear_conversation,
|
| 220 |
+
outputs=chatbot
|
| 221 |
+
)
|
| 222 |
+
|
| 223 |
+
save_btn.click(
|
| 224 |
+
fn=save_conversation,
|
| 225 |
+
outputs=save_status
|
| 226 |
+
)
|
| 227 |
+
|
| 228 |
+
load_convs_btn.click(
|
| 229 |
+
fn=load_conversations,
|
| 230 |
+
outputs=convs_display
|
| 231 |
+
)
|
| 232 |
+
|
| 233 |
+
delete_all_btn.click(
|
| 234 |
+
fn=delete_all_conversations,
|
| 235 |
+
outputs=delete_status
|
| 236 |
+
)
|
| 237 |
+
|
| 238 |
+
if __name__ == "__main__":
|
| 239 |
+
demo.launch()
|