File size: 9,989 Bytes
05269f9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5d66604
05269f9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
import gradio as gr
import asyncio
from rag.pipeline.language_model import LM, LMConfig
from rag.retriever.langchain_retriever import LangChainRetriever
from rag.inference.inferencer import InferencerConfig, Inferencer
from rag import cs_agent, query_maker_agent
def test_inference():
    """Main function that sets up and runs the RAG chatbot interface"""
    
    # Initialize RAG components
    print("==== Start Inference Test ===")
    
    
    print("RAG system initialized successfully!")

    def chatbot_response(message, history):
        """Streaming response menggunakan RAG inferencer"""
        try:
            # Create new event loop for this thread
            loop = asyncio.new_event_loop()
            asyncio.set_event_loop(loop)

            async def stream_response():
                partial_response = ""
                # print("message = ", message)
                # formatted_query = await query_maker_agent.get_result(question = message)
                # print("Formatted Query = ", formatted_query)
                # formatted_query = formatted_query['responses'][0]['rag_response']
                await cs_agent.load_documents()
                async for stream_data in cs_agent.get_result(question = message):

                    if stream_data["type"] == "chunk":
                        chunk = stream_data["data"]["chunk"]
                        partial_response += chunk
                        yield partial_response
                        
                    elif stream_data["type"] == "metadata":
                        setup_time = stream_data['data']['setup_time']
                        print(f"\nSetup completed in {setup_time:.2f}s")
                        
                    elif stream_data["type"] == "complete":
                        total_time = stream_data['data']['total_time']
                        print(f"\nTotal time: {total_time:.2f}s")
            
            async_gen = stream_response()

            try:
                while True:
                    result = loop.run_until_complete(async_gen.__anext__())
                    yield result
            except StopAsyncIteration:
                pass
            finally:
                loop.close()
                
        except Exception as e:
            yield f"❌ Error: {str(e)}"

    def add_document_to_vectorstore(file_path):
        """Add document to vectorstore"""
        if not file_path:
            return "⚠️ Please select a file first."
        
        try:
            loop = asyncio.new_event_loop()
            asyncio.set_event_loop(loop)
            
            async def add_doc():
                result = ""
                return result
            
            result = loop.run_until_complete(add_doc().__anext__())
            loop.close()
            
            if result.success:
                return f"✅ Successfully added: {result.document_metadata.file_name} ({result.document_metadata.chunk_count} chunks)"
            else:
                return f"❌ Failed to add document: {result.error_message}"
                
        except Exception as e:
            return f"❌ Error adding document: {str(e)}"

    def clear_chat():
        """Function untuk clear chat history"""
        return [], ""

    # CSS untuk styling
    css = """
    .gradio-container {
        max-width: 900px !important;
        margin: auto !important;
    }
    .chat-message {
        padding: 10px;
        margin: 5px;
        border-radius: 10px;
    }
    #chatbot {
        height: 500px;
    }
    """

    # Membuat interface Gradio
    with gr.Blocks(css=css, title="RAG Chatbot") as demo:
        gr.Markdown("""
        # 🤖 SakuraAI, Virtual Assistant 
        """)
        
        # Status indicator
        with gr.Row():
            status_text = gr.Textbox(
                value="✅ RAG System Ready",
                label="System Status",
                interactive=False,
                container=True
            )
        
        with gr.Row():
            with gr.Column(scale=2):
                chatbot = gr.Chatbot(
                    elem_id="chatbot",
                    show_label=False,
                    container=True,
                    bubble_full_width=False,
                    show_copy_button=True,
                    layout="panel"
                )
                
                with gr.Row():
                    msg = gr.Textbox(
                        placeholder="Tanyakan sesuatu tentang dokumen Anda...",
                        show_label=False,
                        scale=4,
                        container=False,
                        lines=1,
                        max_lines=3,
                        autofocus=True
                    )
                    send_btn = gr.Button("Kirim", variant="primary", scale=1)
                
                with gr.Row():
                    clear_btn = gr.Button("🗑️ Clear Chat", variant="secondary")
                    stop_btn = gr.Button("⏹️ Stop", variant="stop", visible=False)
            
            # Document management panel
            with gr.Column(scale=1):
                gr.Markdown("### 📚 Document Management")
                
                with gr.Group():
                    file_upload = gr.File(
                        label="Upload Document",
                        file_types=[".pdf", ".txt", ".docx"],
                        type="filepath"
                    )
                    upload_btn = gr.Button("Add to Knowledge Base", variant="secondary")
                    upload_status = gr.Textbox(
                        label="Upload Status",
                        interactive=False,
                        lines=3
                    )
                
                gr.Markdown("""
                ### ⚙️ RAG Settings
                - **K**: 3 (documents retrieved)
                - **Template**: Friendly
                - **Reranking**: Disabled
                - **Vectorstore**: ChromaDB
                """)
        
        # State untuk tracking
        is_generating = gr.State(False)
        
        # Event handlers untuk chat
        def user_message(message, history, generating):
            """Handle user message"""
            if message.strip() and not generating:
                history.append([message, None])
                return "", history, True, gr.update(visible=True), gr.update(interactive=False)
            return message, history, generating, gr.update(visible=False), gr.update(interactive=True)
        
        def bot_message_stream(history, generating):
            """Handle streaming bot response"""
            if history and history[-1][1] is None and generating:
                user_msg = history[-1][0]
                
                for partial_response in chatbot_response(user_msg, history):
                    history[-1][1] = partial_response
                    yield history, True, gr.update(visible=True), gr.update(interactive=False)
                
                yield history, False, gr.update(visible=False), gr.update(interactive=True)
            else:
                yield history, generating, gr.update(visible=False), gr.update(interactive=True)
        
        def stop_generation():
            """Stop the generation process"""
            return False, gr.update(visible=False), gr.update(interactive=True)
        
        # Binding events untuk submit message
        submit_event = msg.submit(
            user_message,
            inputs=[msg, chatbot, is_generating],
            outputs=[msg, chatbot, is_generating, stop_btn, send_btn]
        ).then(
            bot_message_stream,
            inputs=[chatbot, is_generating],
            outputs=[chatbot, is_generating, stop_btn, send_btn]
        )
        
        # Binding events untuk send button
        send_event = send_btn.click(
            user_message,
            inputs=[msg, chatbot, is_generating],
            outputs=[msg, chatbot, is_generating, stop_btn, send_btn]
        ).then(
            bot_message_stream,
            inputs=[chatbot, is_generating],
            outputs=[chatbot, is_generating, stop_btn, send_btn]
        )
        
        # Clear chat event
        clear_btn.click(
            clear_chat,
            outputs=[chatbot, msg]
        ).then(
            lambda: (False, gr.update(visible=False), gr.update(interactive=True)),
            outputs=[is_generating, stop_btn, send_btn]
        )
        
        # Stop generation event
        stop_btn.click(
            stop_generation,
            outputs=[is_generating, stop_btn, send_btn],
            cancels=[submit_event, send_event]
        )
        
        # Document upload event
        upload_btn.click(
            add_document_to_vectorstore,
            inputs=[file_upload],
            outputs=[upload_status]
        )
        
        # Info panel
        with gr.Accordion("ℹ️ Info Penggunaan", open=False):
            gr.Markdown("""
            ### Cara Menggunakan:
            1. **Chat**: Ketik pertanyaan tentang dokumen yang sudah dimuat
            2. **Upload**: Tambahkan dokumen baru ke knowledge base
            3. **Stream**: Response akan muncul secara streaming
            4. **Stop**: Gunakan tombol stop untuk menghentikan generasi
            
            ### Dokumen yang Dimuat:
            - file2.pdf (dari folder documents)
            - Dokumen tambahan yang Anda upload
            
            ### Teknologi yang Digunakan:
            - **LLM**: Qwen dengan streaming
            - **Embedding**: text-embedding-3-small
            - **Vectorstore**: ChromaDB
            - **Search**: Hybrid search (dense + sparse)
            """)

    # Launch the interface
    print("Launching Gradio interface...")
    demo.launch(
        share=False,
        server_name="0.0.0.0",
        server_port=7861,
        show_error=True,
        show_api=False
    )