Pranesh64 commited on
Commit
9efd5cd
Β·
verified Β·
1 Parent(s): b999654

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +247 -0
app.py ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Gradio app for Multi-Document RAG Assistant
3
+ (Auto-loads documents from data/ directory)
4
+ """
5
+
6
+ import gradio as gr
7
+ from backend.processing import process_documents_from_directory, get_available_files
8
+ from backend.rag import RAGEngine
9
+ from backend.llm import LLMClient
10
+
11
+ # -------------------------------
12
+ # Global state
13
+ # -------------------------------
14
+ rag_engine = RAGEngine()
15
+ llm_client = LLMClient()
16
+
17
+ # -------------------------------
18
+ # Auto-initialize on startup
19
+ # -------------------------------
20
+ def initialize_system():
21
+ """Initialize the system by loading documents from data/ directory."""
22
+ try:
23
+ available_files = get_available_files("data")
24
+ if not available_files:
25
+ return "⚠️ No documents found in data/ directory. Please add PDF, TXT, or MD files to the data folder.", []
26
+
27
+ print(f"πŸ“ Found {len(available_files)} files: {available_files}")
28
+
29
+ # Check if we already have an index with these files
30
+ if rag_engine.get_chunk_count() > 0:
31
+ return f"βœ… Using existing index with {rag_engine.get_chunk_count()} chunks", available_files
32
+
33
+ # Process and index documents
34
+ chunks = process_documents_from_directory("data")
35
+ if chunks:
36
+ rag_engine.add_documents(chunks)
37
+ return f"βœ… Ready! Indexed {len(chunks)} chunks from {len(available_files)} documents.", available_files
38
+ else:
39
+ return "⚠️ No valid content extracted from documents", available_files
40
+ except Exception as e:
41
+ return f"❌ Error initializing system: {str(e)}", []
42
+
43
+ # Initialize system on startup
44
+ system_status, loaded_files = initialize_system()
45
+ print(f"System Status: {system_status}")
46
+
47
+ # -------------------------------
48
+ # Rebuild index function
49
+ # -------------------------------
50
+ def rebuild_index():
51
+ """Rebuild the index from data/ directory."""
52
+ try:
53
+ chunk_count = rag_engine.rebuild_from_data("data")
54
+ available_files = get_available_files("data")
55
+ if chunk_count > 0:
56
+ status = f"βœ… Rebuilt index with {chunk_count} chunks from {len(available_files)} files"
57
+ else:
58
+ status = "⚠️ No documents found to index"
59
+ return status, chunk_count, available_files
60
+ except Exception as e:
61
+ return f"❌ Error rebuilding index: {str(e)}", 0, []
62
+
63
+ # -------------------------------
64
+ # Search & generate answer
65
+ # -------------------------------
66
+ def search_and_answer(question, top_k, history):
67
+ if not question.strip():
68
+ return history, ""
69
+
70
+ if rag_engine.get_chunk_count() == 0:
71
+ error_msg = "⚠️ No documents loaded. Please add PDF, TXT, or MD files to the 'data/' directory and click 'Rebuild Index'."
72
+ history.append({"role": "user", "content": question})
73
+ history.append({"role": "assistant", "content": error_msg})
74
+ return history, ""
75
+
76
+ try:
77
+ # Search for relevant chunks
78
+ results = rag_engine.search(question, top_k=top_k)
79
+
80
+ if not results:
81
+ no_results_msg = "⚠️ No relevant information found in the documents for this question."
82
+ history.append({"role": "user", "content": question})
83
+ history.append({"role": "assistant", "content": no_results_msg})
84
+ return history, ""
85
+
86
+ # Generate answer
87
+ answer = llm_client.generate_answer(question, results)
88
+
89
+ # Add to chat history
90
+ history.append({"role": "user", "content": question})
91
+ history.append({"role": "assistant", "content": answer})
92
+
93
+ return history, ""
94
+
95
+ except Exception as e:
96
+ error_msg = f"❌ Error processing question: {str(e)}"
97
+ history.append({"role": "user", "content": question})
98
+ history.append({"role": "assistant", "content": error_msg})
99
+ return history, ""
100
+
101
+ def get_system_info():
102
+ """Get current system information."""
103
+ current_files = get_available_files("data")
104
+ chunk_count = rag_engine.get_chunk_count()
105
+
106
+ info = f"""
107
+ **πŸ“Š System Status**
108
+
109
+ **πŸ“ Documents in data/ folder:** {len(current_files)}
110
+ {chr(10).join([f"β€’ {file}" for file in current_files]) if current_files else "β€’ None"}
111
+
112
+ **🧠 Chunks Indexed:** {chunk_count}
113
+
114
+ **πŸ€– LLM Status:** {"βœ… Azure OpenAI configured" if llm_client.has_token() else "⚠️ No Azure OpenAI token (using extractive fallback)"}
115
+
116
+ **πŸ’‘ Usage:** Ask questions about the content in your documents. The system searches through all indexed chunks to provide relevant answers.
117
+ """
118
+ return info
119
+
120
+ # -------------------------------
121
+ # UI - Clean Chat Interface
122
+ # -------------------------------
123
+ with gr.Blocks(
124
+ title="AI Document Assistant",
125
+ theme=gr.themes.Soft(),
126
+ css="""
127
+ .gradio-container {
128
+ max-width: 1200px !important;
129
+ margin: auto;
130
+ }
131
+ """
132
+ ) as demo:
133
+
134
+ # Header
135
+ gr.Markdown("""
136
+ # πŸ€– AI Document Assistant
137
+
138
+ Ask questions about your documents. The system automatically loads all documents from the `data/` directory.
139
+ """)
140
+
141
+ # System info and controls
142
+ with gr.Accordion("πŸ“Š System Information & Controls", open=False):
143
+ system_info = gr.Markdown(get_system_info())
144
+
145
+ with gr.Row():
146
+ refresh_info_btn = gr.Button("πŸ”„ Refresh Info", variant="secondary")
147
+ rebuild_btn = gr.Button("πŸ”¨ Rebuild Index", variant="secondary")
148
+
149
+ rebuild_status = gr.Markdown()
150
+
151
+ # Main chat interface
152
+ chatbot = gr.Chatbot(
153
+ type="messages",
154
+ height=500,
155
+ show_label=False,
156
+ container=True,
157
+ show_copy_button=True
158
+ )
159
+
160
+ # Input area
161
+ with gr.Row():
162
+ question = gr.Textbox(
163
+ placeholder="Ask a question about your documents...",
164
+ label="Your Question",
165
+ scale=4,
166
+ lines=1,
167
+ max_lines=3
168
+ )
169
+
170
+ submit_btn = gr.Button("πŸ’¬ Send", variant="primary", scale=1)
171
+
172
+ # Advanced options
173
+ with gr.Accordion("βš™οΈ Advanced Settings", open=False):
174
+ top_k = gr.Slider(
175
+ minimum=1,
176
+ maximum=10,
177
+ value=5,
178
+ step=1,
179
+ label="Number of document chunks to retrieve",
180
+ info="Higher values provide more context but may include less relevant information"
181
+ )
182
+
183
+ clear_btn = gr.Button("πŸ—‘οΈ Clear Chat History", variant="secondary")
184
+
185
+ # -------------------------------
186
+ # Event handlers
187
+ # -------------------------------
188
+
189
+ # Submit on button click
190
+ submit_btn.click(
191
+ search_and_answer,
192
+ inputs=[question, top_k, chatbot],
193
+ outputs=[chatbot, question]
194
+ )
195
+
196
+ # Submit on Enter key
197
+ question.submit(
198
+ search_and_answer,
199
+ inputs=[question, top_k, chatbot],
200
+ outputs=[chatbot, question]
201
+ )
202
+
203
+ # Clear chat history
204
+ clear_btn.click(
205
+ lambda: [],
206
+ outputs=[chatbot]
207
+ )
208
+
209
+ # Refresh system info
210
+ refresh_info_btn.click(
211
+ get_system_info,
212
+ outputs=[system_info]
213
+ )
214
+
215
+ # Rebuild index
216
+ rebuild_btn.click(
217
+ rebuild_index,
218
+ outputs=[rebuild_status, system_info, system_info] # Update both status and info
219
+ )
220
+
221
+ # Show welcome message if system is ready
222
+ if rag_engine.get_chunk_count() > 0:
223
+ demo.load(
224
+ lambda: [{
225
+ "role": "assistant",
226
+ "content": f"πŸ‘‹ **Welcome to AI Document Assistant!**\n\nI'm ready to help you with questions about your documents. I have access to **{rag_engine.get_chunk_count()} chunks** of information from **{len(loaded_files)} documents**:\n\n" +
227
+ "\n".join([f"πŸ“„ {file}" for file in loaded_files]) +
228
+ f"\n\nπŸ’‘ **What would you like to know?** You can ask about specific topics, request summaries, or explore relationships between different documents."
229
+ }],
230
+ outputs=[chatbot]
231
+ )
232
+ else:
233
+ demo.load(
234
+ lambda: [{
235
+ "role": "assistant",
236
+ "content": "⚠️ **No documents loaded.**\n\nTo get started:\n1. Create a `data/` folder in your project directory\n2. Add PDF, TXT, or MD files to the folder\n3. Click 'πŸ”¨ Rebuild Index' or restart the application\n\nI'll automatically load and index all your documents for instant searching!"
237
+ }],
238
+ outputs=[chatbot]
239
+ )
240
+
241
+ # -------------------------------
242
+ # Launch
243
+ # -------------------------------
244
+ if __name__ == "__main__":
245
+ demo.launch(
246
+ debug=True
247
+ )