File size: 5,217 Bytes
e6a3b60
91e0ac4
 
 
 
 
 
 
 
 
e6a3b60
91e0ac4
 
 
 
 
6fee022
91e0ac4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e6a3b60
91e0ac4
 
 
 
6fee022
91e0ac4
 
 
e6a3b60
91e0ac4
 
 
 
4da1bda
91e0ac4
 
 
 
e6a3b60
91e0ac4
 
 
 
e6a3b60
91e0ac4
 
 
e6a3b60
91e0ac4
 
 
 
 
 
 
 
 
 
 
 
 
e6a3b60
3d4cb20
91e0ac4
3d4cb20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e6a3b60
91e0ac4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3d4cb20
91e0ac4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e6a3b60
 
91e0ac4
 
 
3a62bac
91e0ac4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
import gradio as gr
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_community.chat_models import ChatOpenAI
import os
from tempfile import NamedTemporaryFile

# Load API Key
def load_api_key():
    if "OPENROUTER_API_KEY" in os.environ:
        return os.getenv("OPENROUTER_API_KEY")
    raise ValueError("API key not found in environment variables")
OPENROUTER_API_KEY=load_api_key()
# Process PDF files
def process_pdfs(files):
    all_chunks = []
    for file_info in files:  # file_info is a Gradio File object
        with NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
            # Copy file content to temporary file
            with open(file_info.name, "rb") as f:
                tmp_file.write(f.read())
            tmp_file_path = tmp_file.name
        
        try:
            loader = PyPDFLoader(tmp_file_path)
            pages = loader.load()
            
            text_splitter = RecursiveCharacterTextSplitter(
                chunk_size=1000,
                chunk_overlap=200,
                length_function=len
            )
            chunks = text_splitter.split_documents(pages)
            all_chunks.extend(chunks)
        finally:
            os.unlink(tmp_file_path)
    
    if not all_chunks:
        raise ValueError("No content was loaded from the files")
    
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
    )
    vectorstore = FAISS.from_documents(all_chunks, embeddings)
    return vectorstore.as_retriever(search_kwargs={"k": 3})

# Initialize language model
def load_model():
    return ChatOpenAI(
        base_url="https://openrouter.ai/api/v1",
        api_key=OPENROUTER_API_KEY,
        model="mistralai/mistral-7b-instruct",
        temperature=0.3
    )

# Prompt template
template = """
You are an intelligent assistant specialized in document analysis.
Use the following information from PDF files to answer the question:
answer dependent on the language question arabic or english  
{context}
Question: {question}
Answer (in detail and in clear language):
"""

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=template
)

# Application state variables
qa_chain = None
chat_history = []

# Function to process messages and respond
def respond(message, chat_history):
    global qa_chain
    
    if qa_chain is None:
        return chat_history + [(message, "Please upload PDF files first")]
    
    try:
        result = qa_chain({"query": message})
        response = result["result"]
        return chat_history + [(message, response)]
    except Exception as e:
        return chat_history + [(message, f"An error occurred: {str(e)}")]

    # Event handling
def handle_upload(files):
        global qa_chain
        try:
            retriever = process_pdfs(files)
            llm = load_model()
            
            qa_chain = RetrievalQA.from_chain_type(
                llm=llm,
                retriever=retriever,
                chain_type="stuff",
                chain_type_kwargs={
                    "prompt": PromptTemplate(
                        template=template,
                        input_variables=["context", "question"]
                    )
                },
                return_source_documents=False
            )
            return "Files uploaded successfully!"
        except Exception as e:
            return f"Error uploading files: {str(e)}"

# UI
with gr.Blocks(title="Smart Document Assistant", theme=gr.themes.Default()) as demo:
    gr.Markdown("# 📄 Smart Document Assistant")
    gr.Markdown("Upload PDF files then start chatting")
    
    # Chat section
    chatbot = gr.Chatbot(height=500)
    
    # Input section
    with gr.Row():
        msg = gr.Textbox(
            placeholder="Type your question here...",
            show_label=False,
            scale=4
        )
        submit_btn = gr.Button("Send", scale=1)
    
    # File section
    with gr.Row():
        file_upload = gr.Files(
            label="Upload PDF files",
            file_types=[".pdf"],
            file_count="multiple"
        )
        upload_status = gr.Textbox(label="Upload Status", interactive=False)
    
    clear_btn = gr.Button("Clear Chat")
    

    
    file_upload.change(
        handle_upload,
        inputs=file_upload,
        outputs=upload_status
    )
    
    submit_btn.click(
        respond,
        inputs=[msg, chatbot],
        outputs=[chatbot]
    ).then(
        lambda: "",
        None,
        [msg]
    )
    
    msg.submit(
        respond,
        inputs=[msg, chatbot],
        outputs=[chatbot]
    ).then(
        lambda: "",
        None,
        [msg]
    )
    
    clear_btn.click(
        lambda: [],
        None,
        [chatbot]
    )

if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
       
    )