File size: 5,217 Bytes
e6a3b60 91e0ac4 e6a3b60 91e0ac4 6fee022 91e0ac4 e6a3b60 91e0ac4 6fee022 91e0ac4 e6a3b60 91e0ac4 4da1bda 91e0ac4 e6a3b60 91e0ac4 e6a3b60 91e0ac4 e6a3b60 91e0ac4 e6a3b60 3d4cb20 91e0ac4 3d4cb20 e6a3b60 91e0ac4 3d4cb20 91e0ac4 e6a3b60 91e0ac4 3a62bac 91e0ac4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 | import gradio as gr
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_community.chat_models import ChatOpenAI
import os
from tempfile import NamedTemporaryFile
# Load API Key
def load_api_key():
if "OPENROUTER_API_KEY" in os.environ:
return os.getenv("OPENROUTER_API_KEY")
raise ValueError("API key not found in environment variables")
OPENROUTER_API_KEY=load_api_key()
# Process PDF files
def process_pdfs(files):
all_chunks = []
for file_info in files: # file_info is a Gradio File object
with NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
# Copy file content to temporary file
with open(file_info.name, "rb") as f:
tmp_file.write(f.read())
tmp_file_path = tmp_file.name
try:
loader = PyPDFLoader(tmp_file_path)
pages = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
length_function=len
)
chunks = text_splitter.split_documents(pages)
all_chunks.extend(chunks)
finally:
os.unlink(tmp_file_path)
if not all_chunks:
raise ValueError("No content was loaded from the files")
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
)
vectorstore = FAISS.from_documents(all_chunks, embeddings)
return vectorstore.as_retriever(search_kwargs={"k": 3})
# Initialize language model
def load_model():
return ChatOpenAI(
base_url="https://openrouter.ai/api/v1",
api_key=OPENROUTER_API_KEY,
model="mistralai/mistral-7b-instruct",
temperature=0.3
)
# Prompt template
template = """
You are an intelligent assistant specialized in document analysis.
Use the following information from PDF files to answer the question:
answer dependent on the language question arabic or english
{context}
Question: {question}
Answer (in detail and in clear language):
"""
prompt = PromptTemplate(
input_variables=["context", "question"],
template=template
)
# Application state variables
qa_chain = None
chat_history = []
# Function to process messages and respond
def respond(message, chat_history):
global qa_chain
if qa_chain is None:
return chat_history + [(message, "Please upload PDF files first")]
try:
result = qa_chain({"query": message})
response = result["result"]
return chat_history + [(message, response)]
except Exception as e:
return chat_history + [(message, f"An error occurred: {str(e)}")]
# Event handling
def handle_upload(files):
global qa_chain
try:
retriever = process_pdfs(files)
llm = load_model()
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
retriever=retriever,
chain_type="stuff",
chain_type_kwargs={
"prompt": PromptTemplate(
template=template,
input_variables=["context", "question"]
)
},
return_source_documents=False
)
return "Files uploaded successfully!"
except Exception as e:
return f"Error uploading files: {str(e)}"
# UI
with gr.Blocks(title="Smart Document Assistant", theme=gr.themes.Default()) as demo:
gr.Markdown("# 📄 Smart Document Assistant")
gr.Markdown("Upload PDF files then start chatting")
# Chat section
chatbot = gr.Chatbot(height=500)
# Input section
with gr.Row():
msg = gr.Textbox(
placeholder="Type your question here...",
show_label=False,
scale=4
)
submit_btn = gr.Button("Send", scale=1)
# File section
with gr.Row():
file_upload = gr.Files(
label="Upload PDF files",
file_types=[".pdf"],
file_count="multiple"
)
upload_status = gr.Textbox(label="Upload Status", interactive=False)
clear_btn = gr.Button("Clear Chat")
file_upload.change(
handle_upload,
inputs=file_upload,
outputs=upload_status
)
submit_btn.click(
respond,
inputs=[msg, chatbot],
outputs=[chatbot]
).then(
lambda: "",
None,
[msg]
)
msg.submit(
respond,
inputs=[msg, chatbot],
outputs=[chatbot]
).then(
lambda: "",
None,
[msg]
)
clear_btn.click(
lambda: [],
None,
[chatbot]
)
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
server_port=7860,
) |