Spaces:
Build error
Build error
File size: 6,271 Bytes
3098327 5287278 e0f824d 58bb824 5287278 fbbc2a0 eaf02ec 5287278 15814d6 3098327 15814d6 1e8a0ae e7ad22b 15814d6 0a25c4f 23fe62d 0a25c4f 15814d6 37e554b 15814d6 150d864 15814d6 eaf02ec 1e75ee2 15814d6 e7ad22b 15814d6 e7ad22b 15814d6 e7ad22b 15814d6 b608d50 1e75ee2 1601212 15814d6 ec7f498 37e554b 2729042 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 | import gradio as gr
import os
import time
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
#from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.memory import ConversationBufferMemory
from langchain_community.llms import HuggingFaceEndpoint
from langchain_community.document_loaders import PyPDFLoader
cwd = os.getcwd() # current work dir
global vector_db
global qa_chain
def load_doc(list_file_path):
loaders = [PyPDFLoader(x) for x in list_file_path]
pages = []
for loader in loaders:
pages.extend(loader.load())
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1024, chunk_overlap=64
)
doc_splits = text_splitter.split_documents(pages)
return doc_splits
def create_db(splits):
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectordb = FAISS.from_documents(splits, embeddings)
return vectordb
def initialize_chatbot(vector_db):
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
retriever = vector_db.as_retriever()
llm = HuggingFaceEndpoint(
repo_id="mistralai/Mistral-7B-Instruct-v0.2",
huggingfacehub_api_token=os.environ.get("HUGGINGFACE_API_TOKEN"),
temperature=0.5,
max_new_tokens=512,
task="text-generation" # Explicitly specify the task type
)
qa_chain = ConversationalRetrievalChain.from_llm(
llm=llm,
retriever=retriever,
memory=memory,
verbose=False
)
return qa_chain
def process_and_initialize(files):
if not files:
return None, None, "Please upload a file first."
try:
list_file_path = [file.name for file in files if file is not None]
list_file_path = [f"{cwd}/produk-aaa-00-intro.pdf", f"{cwd}/produk-aaa-01.pdf"]
print(list_file_path)
doc_splits = load_doc(list_file_path)
db = create_db(doc_splits)
qa = initialize_chatbot(db)
return db, qa, "Database created! Ready for questions."
except Exception as e:
return None, None, f"Processing error: {str(e)}"
def process_dokumen():
try:
list_file_path = [f"{cwd}/produk-aaa-00-intro.pdf", f"{cwd}/produk-aaa-01.pdf"]
print(list_file_path)
doc_splits = load_doc(list_file_path)
db = create_db(doc_splits)
qa = initialize_chatbot(db)
return db, qa, "Database created! Ready for questions."
except Exception as e:
return None, None, f"Processing error: {str(e)}"
def user_query_typing_effect(query, qa_chain, chatbot):
history = chatbot or []
try:
response = qa_chain.invoke({"question": query, "chat_history": []})
assistant_response = response["answer"]
history.append({"role": "user", "content": query})
history.append({"role": "assistant", "content": ""})
for i in range(len(assistant_response)):
history[-1]["content"] += assistant_response[i]
yield history, ""
time.sleep(0.03)
except Exception as e:
history.append({"role": "assistant", "content": f"Error: {str(e)}"})
yield history, ""
def main():
custom_css = """
body {
background-color: #FF8C00;
font-family: Arial, sans-serif;
}
.gradio-container {
border-radius: 15px;
box-shadow: 0px 4px 20px rgba(0, 0, 0, 0.3);
padding: 20px;
}
footer {
/* visibility: hidden; */
}
.chatbot {
border: 2px solid #000;
border-radius: 10px;
background-color: #FFF5E1;
}
"""
with gr.Blocks(css=custom_css) as app:
vector_db = gr.State()
qa_chain = gr.State()
txt_file = [f"{cwd}/produk-aaa-00-intro.pdf", f"{cwd}/produk-aaa-01.pdf"]
gr.Markdown("### π **PDF & TXT Chatbot** π")
gr.Markdown("#### Upload your document and ask questions interactively!")
with gr.Row():
with gr.Column(scale=1):
txt_file = gr.Files(
label="π Upload Documents",
file_types=[".txt", ".pdf"],
type="filepath"
)
analyze_btn = gr.Button("π Process Documents")
status = gr.Textbox(
label="π Status",
placeholder="Status updates will appear here...",
interactive=False
)
with gr.Column(scale=3):
chatbot = gr.Chatbot(
label="π€ Chat with your data",
height=600,
bubble_full_width=False,
show_label=False,
render_markdown=True,
type="messages",
elem_classes=["chatbot"]
)
query_input = gr.Textbox(
label="Ask a question",
placeholder="Ask about the document...",
show_label=False,
container=False
)
query_btn = gr.Button("Ask")
analyze_btn.click(
fn=process_and_initialize,
inputs=[txt_file],
outputs=[vector_db, qa_chain, status],
show_progress="minimal",
api_name="satu"
)
query_btn.click(
fn=user_query_typing_effect,
inputs=[query_input, qa_chain, chatbot],
outputs=[chatbot, query_input],
show_progress="minimal",
api_name="dua"
)
query_input.submit(
fn=user_query_typing_effect,
inputs=[query_input, qa_chain, chatbot],
outputs=[chatbot, query_input],
show_progress="minimal",
api_name="tiga"
)
def darso():
vector_db, qa_chain, status = process_and_initialize([f"{cwd}/produk-aaa-00-intro.pdf", f"{cwd}/produk-aaa-01.pdf"])
app.launch()
if __name__ == "__main__":
main()
|