File size: 4,825 Bytes
c1244e1
dddbc2e
 
c3f97cb
dddbc2e
c3f97cb
dddbc2e
 
 
694a2d1
e4ca284
dddbc2e
e4ca284
c8aea5d
 
e4ca284
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f3d30d1
e4ca284
dddbc2e
e4ca284
 
 
 
694a2d1
dddbc2e
e4ca284
 
 
dddbc2e
 
 
e4ca284
dddbc2e
 
e4ca284
 
 
 
 
 
dddbc2e
 
e4ca284
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import streamlit as st
from streamlit_chat import message
import tempfile
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
from langchain_g4f import G4FLLM
from g4f import Provider, models

# Configuration
DB_FAISS_PATH = 'vectorstore/db_faiss'
EMBEDDING_MODEL = 'sentence-transformers/all-MiniLM-L6-v2'
LLM_MODEL = models.gpt_35_long 
LLM_PROVIDER = Provider.OpenaiChat

# UI Configuration
def configure_ui():
    """Configure Streamlit UI settings"""
    st.set_page_config(page_title="Zendo AI Assistant", page_icon="📄")
    hide_streamlit_style = """
    <style>
    #MainMenu {visibility: hidden;}
    footer {visibility: hidden;}
    .stTextInput input {font-size: 16px;}
    </style>
    """
    st.markdown(hide_streamlit_style, unsafe_allow_html=True)

# Initialize session state
def init_session_state():
    """Initialize session state variables"""
    if 'history' not in st.session_state:
        st.session_state['history'] = []
    if 'generated' not in st.session_state:
        st.session_state['generated'] = ["こんにちは!Zendoアシスタントです。PDFの内容について何でも聞いてください 🤗"]
    if 'past' not in st.session_state:
        st.session_state['past'] = ["ようこそ!"]

# Load LLM model
def load_llm():
    """Load the language model"""
    return G4FLLM(
        model=LLM_MODEL,
        provider=LLM_PROVIDER,
    )

# Process uploaded PDF
def process_pdf(uploaded_file):
    """Process the uploaded PDF file"""
    with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmpfile:
        tmpfile.write(uploaded_file.getvalue())
        tmpfile_path = tmpfile.name
    
    loader = PyPDFLoader(tmpfile_path)
    pdf_data = loader.load()
    
    embeddings = HuggingFaceEmbeddings(
        model_name=EMBEDDING_MODEL,
        model_kwargs={'device': 'cpu'}
    )
    
    db = FAISS.from_documents(pdf_data, embeddings)
    db.save_local(DB_FAISS_PATH)
    return db

# Chat function
def conversational_chat(query, chain):
    """Handle conversational chat with memory"""
    result = chain({
        "question": query, 
        "chat_history": st.session_state['history']
    })
    st.session_state['history'].append((query, result["answer"]))
    return result["answer"]

# Main application
def main():
    configure_ui()
    init_session_state()
    
    st.title("📄 Zendo AI Assistant - PDFチャットボット")
    
    # Language selection
    col1, col2 = st.columns([1, 3])
    with col1:
        language = st.selectbox("言語/Language", ["日本語", "English", "Tiếng Việt"])
    
    # Upload PDF
    uploaded_file = st.file_uploader(
        "PDFファイルをアップロードしてください (Upload PDF file)", 
        type="pdf",
        help="PDFをアップロードすると、その内容について質問できます"
    )
    
    if uploaded_file:
        with st.spinner("PDFを処理中...少々お待ちください"):
            db = process_pdf(uploaded_file)
            llm = load_llm()
            chain = ConversationalRetrievalChain.from_llm(
                llm=llm, 
                retriever=db.as_retriever()
            )
            st.success("PDFの処理が完了しました!質問をどうぞ")
        
        # Chat interface
        response_container = st.container()
        
        with st.form(key='chat_form', clear_on_submit=True):
            user_input = st.text_input(
                "メッセージを入力...", 
                key='input',
                placeholder="PDFについて質問してください"
            )
            submit_button = st.form_submit_button(label='送信')
            
            if submit_button and user_input:
                output = conversational_chat(user_input, chain)
                st.session_state['past'].append(user_input)
                st.session_state['generated'].append(output)
        
        # Display chat history
        if st.session_state['generated']:
            with response_container:
                for i in range(len(st.session_state['generated'])):
                    message(
                        st.session_state["past"][i], 
                        is_user=True, 
                        key=str(i) + '_user', 
                        avatar_style="big-smile"
                    )
                    message(
                        st.session_state["generated"][i], 
                        key=str(i), 
                        avatar_style="thumbs"
                    )
    else:
        st.info("PDFファイルをアップロードしてチャットを開始してください")

if __name__ == "__main__":
    main()