Spaces:

darsoarafa
/

g1

Build error

File size: 6,271 Bytes

import gradio as gr
import os
import time
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
#from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.memory import ConversationBufferMemory
from langchain_community.llms import HuggingFaceEndpoint
from langchain_community.document_loaders import PyPDFLoader

cwd = os.getcwd() # current work dir
global vector_db
global qa_chain
def load_doc(list_file_path):
    loaders = [PyPDFLoader(x) for x in list_file_path]
    pages = []
    for loader in loaders:
        pages.extend(loader.load())
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1024, chunk_overlap=64
    )
    doc_splits = text_splitter.split_documents(pages)
    return doc_splits

def create_db(splits):
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    vectordb = FAISS.from_documents(splits, embeddings)
    return vectordb

def initialize_chatbot(vector_db):
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
    retriever = vector_db.as_retriever()
    llm = HuggingFaceEndpoint(
        repo_id="mistralai/Mistral-7B-Instruct-v0.2",
        huggingfacehub_api_token=os.environ.get("HUGGINGFACE_API_TOKEN"),
        temperature=0.5,
        max_new_tokens=512,
        task="text-generation"  # Explicitly specify the task type
    )
    qa_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=retriever,
        memory=memory,
        verbose=False
    )
    return qa_chain

def process_and_initialize(files):
    if not files:
        return None, None, "Please upload a file first."
    
    try:
        list_file_path = [file.name for file in files if file is not None]
        list_file_path = [f"{cwd}/produk-aaa-00-intro.pdf", f"{cwd}/produk-aaa-01.pdf"]
        print(list_file_path)
        doc_splits = load_doc(list_file_path)
        db = create_db(doc_splits)
        qa = initialize_chatbot(db)
        return db, qa, "Database created! Ready for questions."
    except Exception as e:
        return None, None, f"Processing error: {str(e)}"
        
def process_dokumen():
    try:
        list_file_path = [f"{cwd}/produk-aaa-00-intro.pdf", f"{cwd}/produk-aaa-01.pdf"]
        print(list_file_path)
        doc_splits = load_doc(list_file_path)
        db = create_db(doc_splits)
        qa = initialize_chatbot(db)
        return db, qa, "Database created! Ready for questions."
    except Exception as e:
        return None, None, f"Processing error: {str(e)}"
        
def user_query_typing_effect(query, qa_chain, chatbot):
    history = chatbot or []
    try:
        response = qa_chain.invoke({"question": query, "chat_history": []})
        assistant_response = response["answer"]
        history.append({"role": "user", "content": query})
        history.append({"role": "assistant", "content": ""})
        for i in range(len(assistant_response)):
            history[-1]["content"] += assistant_response[i]
            yield history, ""
            time.sleep(0.03)
    except Exception as e:
        history.append({"role": "assistant", "content": f"Error: {str(e)}"})
        yield history, ""

def main():
    custom_css = """
    body {
        background-color: #FF8C00;
        font-family: Arial, sans-serif;
    }
    .gradio-container {
        border-radius: 15px;
        box-shadow: 0px 4px 20px rgba(0, 0, 0, 0.3);
        padding: 20px;
    }
    footer {
        /* visibility: hidden; */
    }
    .chatbot {
        border: 2px solid #000;
        border-radius: 10px;
        background-color: #FFF5E1;
    }
    """
    with gr.Blocks(css=custom_css) as app:
        vector_db = gr.State()
        qa_chain = gr.State()
        txt_file = [f"{cwd}/produk-aaa-00-intro.pdf", f"{cwd}/produk-aaa-01.pdf"]
        gr.Markdown("### 🌟 **PDF & TXT Chatbot** 🌟")
        gr.Markdown("#### Upload your document and ask questions interactively!")
        with gr.Row():
            with gr.Column(scale=1):
                txt_file = gr.Files(
                    label="📁 Upload Documents",
                    file_types=[".txt", ".pdf"],
                    type="filepath"
                )
                analyze_btn = gr.Button("🚀 Process Documents")
                status = gr.Textbox(
                    label="📊 Status",
                    placeholder="Status updates will appear here...",
                    interactive=False
                )
            with gr.Column(scale=3):
                chatbot = gr.Chatbot(
                    label="🤖 Chat with your data",
                    height=600,
                    bubble_full_width=False,
                    show_label=False,
                    render_markdown=True,
                    type="messages",
                    elem_classes=["chatbot"]
                )
                query_input = gr.Textbox(
                    label="Ask a question",
                    placeholder="Ask about the document...",
                    show_label=False,
                    container=False
                )
                query_btn = gr.Button("Ask")
        analyze_btn.click(
            fn=process_and_initialize,
            inputs=[txt_file],
            outputs=[vector_db, qa_chain, status],
            show_progress="minimal",
            api_name="satu"
        )
        query_btn.click(
            fn=user_query_typing_effect,
            inputs=[query_input, qa_chain, chatbot],
            outputs=[chatbot, query_input],
            show_progress="minimal",
            api_name="dua"
        )
        query_input.submit(
            fn=user_query_typing_effect,
            inputs=[query_input, qa_chain, chatbot],
            outputs=[chatbot, query_input],
            show_progress="minimal",
            api_name="tiga"
        )
        def darso():
            vector_db, qa_chain, status = process_and_initialize([f"{cwd}/produk-aaa-00-intro.pdf", f"{cwd}/produk-aaa-01.pdf"])
            
    
    app.launch()

if __name__ == "__main__":
    main()