Bryceeee commited on
Commit
2185b3f
Β·
verified Β·
1 Parent(s): 8fbc2e2

Update app.py

Browse files

after app_minimal

Files changed (1) hide show
  1. app.py +321 -97
app.py CHANGED
@@ -1,126 +1,350 @@
1
  """
2
- Minimal version of app.py for Hugging Face Spaces
3
- Use this for testing if the full version doesn't work
4
  """
5
  import os
6
- import gradio as gr
 
 
 
 
 
 
 
7
  from openai import OpenAI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- # Simple configuration
10
- OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
11
- VECTOR_STORE_ID = os.getenv("VECTOR_STORE_ID", "vs_69022a9edd74819199bf9a34a83e877b")
12
- MODEL = "gpt-4o-mini"
 
 
 
 
 
 
 
13
 
14
 
15
- def query_rag(question: str) -> str:
16
- """Simple RAG query function"""
17
- if not OPENAI_API_KEY:
18
- return "❌ Error: OPENAI_API_KEY not set in Spaces Secrets"
 
 
 
 
 
19
 
20
- if not VECTOR_STORE_ID:
21
- return "❌ Error: VECTOR_STORE_ID not set"
 
22
 
23
  try:
24
- client = OpenAI(api_key=OPENAI_API_KEY)
25
-
26
- # Create assistant with file search
27
- assistant = client.beta.assistants.create(
28
- name="Car Manual Assistant",
29
- instructions="You are a helpful assistant that answers questions about car manuals.",
30
- model=MODEL,
31
- tools=[{"type": "file_search"}],
32
- tool_resources={"file_search": {"vector_store_ids": [VECTOR_STORE_ID]}}
 
 
 
 
 
 
 
33
  )
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
- # Create thread and run
36
- thread = client.beta.threads.create()
37
 
38
- message = client.beta.threads.messages.create(
39
- thread_id=thread.id,
40
- role="user",
41
- content=question
42
- )
43
 
44
- run = client.beta.threads.runs.create_and_poll(
45
- thread_id=thread.id,
46
- assistant_id=assistant.id
47
- )
48
 
49
- if run.status == 'completed':
50
- messages = client.beta.threads.messages.list(thread_id=thread.id)
51
- response = messages.data[0].content[0].text.value
52
-
53
- # Cleanup
54
- client.beta.assistants.delete(assistant.id)
55
-
56
- return response
57
- else:
58
- return f"❌ Query failed with status: {run.status}"
59
-
60
- except Exception as e:
61
- return f"❌ Error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
 
64
- # Create Gradio interface
65
- def create_interface():
66
- """Create minimal Gradio interface"""
 
 
67
 
68
- with gr.Blocks(title="CSRC Car Manual RAG") as demo:
69
- gr.Markdown("""
70
- # πŸš— CSRC Car Manual RAG System
71
- ## Minimal Version for Testing
 
 
 
 
 
 
 
 
 
72
 
73
- This is a simplified version for testing Hugging Face Spaces deployment.
74
- """)
 
75
 
76
- with gr.Row():
77
- with gr.Column():
78
- question_input = gr.Textbox(
79
- label="Your Question",
80
- placeholder="Ask anything about the car manual...",
81
- lines=3
82
- )
83
- submit_btn = gr.Button("Submit", variant="primary")
84
-
85
- with gr.Column():
86
- answer_output = gr.Textbox(
87
- label="Answer",
88
- lines=10
89
- )
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
- gr.Examples(
92
- examples=[
93
- ["What are the main features of the adaptive cruise control?"],
94
- ["How do I use the parking assist system?"],
95
- ["What should I check during regular maintenance?"]
96
- ],
97
- inputs=question_input
 
 
 
 
98
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
- submit_btn.click(
101
- fn=query_rag,
102
- inputs=question_input,
103
- outputs=answer_output
 
 
 
 
 
 
 
 
 
 
104
  )
 
 
 
 
105
 
106
- gr.Markdown("""
107
- ---
108
- ### Status
109
- - βœ… Using vector store: `{}`
110
- - πŸ”‘ API Key: {}
111
- """.format(
112
- VECTOR_STORE_ID,
113
- "βœ… Set" if OPENAI_API_KEY else "❌ Not Set"
114
- ))
115
-
116
- return demo
 
 
 
 
 
 
 
 
 
 
 
117
 
 
 
 
 
 
 
 
 
 
 
118
 
119
- # Create and launch
120
- print("πŸ”„ Creating minimal app...")
121
- demo = create_interface()
122
- print("βœ… App created!")
123
 
 
124
  if __name__ == "__main__":
125
- print("πŸš€ Launching app...")
126
- demo.launch()
 
 
 
 
1
  """
2
+ Hugging Face Spaces Entry Point for CSRC Car Manual RAG System
3
+ Optimized version with timeout protection and lazy loading
4
  """
5
  import os
6
+ import sys
7
+ from pathlib import Path
8
+ import signal
9
+ from contextlib import contextmanager
10
+
11
+ # Add the current directory to Python path for Spaces environment
12
+ sys.path.insert(0, str(Path(__file__).parent))
13
+
14
  from openai import OpenAI
15
+ from src.config import Config
16
+ from src.vector_store import VectorStoreManager
17
+ from src.rag_query import RAGQueryEngine
18
+ from src.question_generator import QuestionGenerator
19
+ from src.knowledge_graph import KnowledgeGraphGenerator
20
+ from src.gradio_interface import GradioInterfaceBuilder
21
+
22
+ # Import personalized learning if available
23
+ try:
24
+ from modules.personalized_learning import UserProfilingSystem, LearningPathGenerator, AdaptiveLearningEngine
25
+ PERSONALIZED_LEARNING_AVAILABLE = True
26
+ except ImportError:
27
+ PERSONALIZED_LEARNING_AVAILABLE = False
28
+ print("⚠️ Personalized learning modules not available")
29
+
30
+ # Import proactive learning if available
31
+ try:
32
+ from modules.proactive_learning import ProactiveLearningEngine
33
+ PROACTIVE_LEARNING_AVAILABLE = True
34
+ except ImportError:
35
+ PROACTIVE_LEARNING_AVAILABLE = False
36
+ print("⚠️ Proactive learning modules not available")
37
 
38
+ # Import scenario contextualization if available
39
+ try:
40
+ from modules.scenario_contextualization.database.scenario_database import ScenarioDatabase
41
+ from modules.scenario_contextualization.integration.feature_extractor import ADASFeatureExtractor
42
+ from modules.scenario_contextualization.retrieval.scenario_retriever import ScenarioRetriever
43
+ from modules.scenario_contextualization.formatting.constructive_formatter import ConstructiveFormatter
44
+ from modules.scenario_contextualization.integration.enhanced_rag_engine import EnhancedRAGEngine
45
+ SCENARIO_CONTEXTUALIZATION_AVAILABLE = True
46
+ except ImportError as e:
47
+ SCENARIO_CONTEXTUALIZATION_AVAILABLE = False
48
+ print(f"⚠️ Scenario contextualization modules not available: {e}")
49
 
50
 
51
+ class TimeoutError(Exception):
52
+ pass
53
+
54
+
55
+ @contextmanager
56
+ def timeout(seconds):
57
+ """Context manager for timeout operations"""
58
+ def timeout_handler(signum, frame):
59
+ raise TimeoutError(f"Operation timed out after {seconds} seconds")
60
 
61
+ # Set the signal handler
62
+ old_handler = signal.signal(signal.SIGALRM, timeout_handler)
63
+ signal.alarm(seconds)
64
 
65
  try:
66
+ yield
67
+ finally:
68
+ # Restore the old handler
69
+ signal.alarm(0)
70
+ signal.signal(signal.SIGALRM, old_handler)
71
+
72
+
73
+ def initialize_system(config: Config) -> dict:
74
+ """Initialize the RAG system components"""
75
+ print("πŸ”§ Initializing core components...")
76
+
77
+ # Initialize OpenAI client
78
+ if not config.openai_api_key:
79
+ raise ValueError(
80
+ "OPENAI_API_KEY not found! Please set it in Hugging Face Spaces Secrets. "
81
+ "Go to Settings > Secrets and add OPENAI_API_KEY"
82
  )
83
+
84
+ client = OpenAI(api_key=config.openai_api_key)
85
+
86
+ # Initialize vector store manager
87
+ vector_store_manager = VectorStoreManager(client)
88
+
89
+ # Get or create vector store
90
+ vector_store_id = config.get_vector_store_id()
91
+
92
+ if not vector_store_id:
93
+ print("πŸ“¦ Creating new vector store...")
94
+ pdf_files = config.get_pdf_files()
95
 
96
+ if not pdf_files:
97
+ raise ValueError(f"No PDF files found in {config.car_manual_dir}")
98
 
99
+ vector_store_details = vector_store_manager.create_vector_store(config.vector_store_name)
100
+ if not vector_store_details:
101
+ raise RuntimeError("Failed to create vector store")
 
 
102
 
103
+ vector_store_id = vector_store_details["id"]
104
+ config.save_vector_store_id(vector_store_id, config.vector_store_name)
 
 
105
 
106
+ # Upload files
107
+ upload_stats = vector_store_manager.upload_pdf_files(pdf_files, vector_store_id)
108
+ if upload_stats["successful_uploads"] == 0:
109
+ raise RuntimeError("Failed to upload any files")
110
+ else:
111
+ print(f"βœ… Using existing vector store: {vector_store_id}")
112
+
113
+ # Initialize RAG query engine
114
+ print("πŸ”§ Initializing RAG engine...")
115
+ rag_engine = RAGQueryEngine(client, vector_store_id, config.model)
116
+
117
+ # Initialize question generator
118
+ print("πŸ”§ Initializing question generator...")
119
+ question_generator = QuestionGenerator(client, rag_engine)
120
+
121
+ # Initialize knowledge graph generator
122
+ print("πŸ”§ Initializing knowledge graph...")
123
+ knowledge_graph = KnowledgeGraphGenerator(client, vector_store_id, str(config.output_dir))
124
+
125
+ # Initialize personalized learning (if available) - with timeout
126
+ user_profiling = None
127
+ learning_path_generator = None
128
+ adaptive_engine = None
129
+
130
+ if PERSONALIZED_LEARNING_AVAILABLE:
131
+ try:
132
+ print("πŸ”§ Initializing personalized learning...")
133
+ with timeout(10):
134
+ user_profiling = UserProfilingSystem()
135
+ learning_path_generator = LearningPathGenerator(user_profiling, config.available_topics)
136
+ adaptive_engine = AdaptiveLearningEngine(user_profiling, learning_path_generator)
137
+ print("βœ… Personalized Learning System initialized!")
138
+ except TimeoutError:
139
+ print("⚠️ Personalized Learning System initialization timed out - skipping")
140
+ except Exception as e:
141
+ print(f"⚠️ Error initializing Personalized Learning System: {e}")
142
+
143
+ # Initialize proactive learning (if available) - with timeout
144
+ proactive_engine = None
145
+ if PROACTIVE_LEARNING_AVAILABLE and user_profiling:
146
+ try:
147
+ print("πŸ”§ Initializing proactive learning...")
148
+ with timeout(10):
149
+ proactive_engine = ProactiveLearningEngine(
150
+ client, rag_engine, user_profiling, adaptive_engine, config.available_topics
151
+ )
152
+ print("βœ… Proactive Learning Assistance initialized!")
153
+ except TimeoutError:
154
+ print("⚠️ Proactive Learning Assistance initialization timed out - skipping")
155
+ except Exception as e:
156
+ print(f"⚠️ Error initializing Proactive Learning Assistance: {e}")
157
+
158
+ # Initialize scenario contextualization (if available) - with timeout
159
+ enhanced_rag_engine = None
160
+ if SCENARIO_CONTEXTUALIZATION_AVAILABLE:
161
+ try:
162
+ print("πŸ”§ Initializing scenario contextualization...")
163
+ with timeout(15):
164
+ scenario_database = ScenarioDatabase()
165
+ feature_extractor = ADASFeatureExtractor(use_llm=False, client=client)
166
+ scenario_retriever = ScenarioRetriever(
167
+ scenario_database=scenario_database,
168
+ scenario_vector_store_id=None,
169
+ client=client
170
+ )
171
+ formatter = ConstructiveFormatter()
172
+ enhanced_rag_engine = EnhancedRAGEngine(
173
+ base_rag_engine=rag_engine,
174
+ scenario_retriever=scenario_retriever,
175
+ feature_extractor=feature_extractor,
176
+ formatter=formatter
177
+ )
178
+ print("βœ… Scenario Contextualization initialized!")
179
+ except TimeoutError:
180
+ print("⚠️ Scenario Contextualization initialization timed out - skipping")
181
+ except Exception as e:
182
+ print(f"⚠️ Error initializing Scenario Contextualization: {e}")
183
+
184
+ print("βœ… Core system initialized!")
185
+ return {
186
+ "client": client,
187
+ "vector_store_manager": vector_store_manager,
188
+ "rag_engine": rag_engine,
189
+ "question_generator": question_generator,
190
+ "knowledge_graph": knowledge_graph,
191
+ "user_profiling": user_profiling,
192
+ "learning_path_generator": learning_path_generator,
193
+ "adaptive_engine": adaptive_engine,
194
+ "proactive_engine": proactive_engine,
195
+ "enhanced_rag_engine": enhanced_rag_engine,
196
+ "config": config
197
+ }
198
 
199
 
200
+ def create_app():
201
+ """Create and return the Gradio app for Hugging Face Spaces"""
202
+ print("=" * 60)
203
+ print("πŸš— CSRC Car Manual RAG System - Hugging Face Spaces")
204
+ print("=" * 60)
205
 
206
+ # Load configuration
207
+ config = Config()
208
+
209
+ # Initialize system with timeout
210
+ try:
211
+ print("⏱️ Starting initialization (max 90 seconds)...")
212
+ with timeout(90):
213
+ components = initialize_system(config)
214
+ except TimeoutError:
215
+ print("❌ System initialization timed out!")
216
+ import gradio as gr
217
+ error_msg = """
218
+ # ❌ Initialization Timeout
219
 
220
+ The system took too long to initialize. This usually happens when:
221
+ 1. Vector store creation is slow
222
+ 2. Too many modules are being loaded at startup
223
 
224
+ **Suggested solutions:**
225
+ 1. Reduce the number of modules loaded at startup
226
+ 2. Use a smaller vector store
227
+ 3. Implement lazy loading for optional features
228
+ """
229
+ return gr.Interface(
230
+ fn=lambda: error_msg,
231
+ inputs=None,
232
+ outputs=gr.Markdown(),
233
+ title="CSRC Car Manual RAG System",
234
+ )
235
+ except Exception as e:
236
+ print(f"❌ Error initializing system: {e}")
237
+ import traceback
238
+ traceback.print_exc()
239
+ import gradio as gr
240
+
241
+ error_msg = f"""
242
+ # ❌ Initialization Error
243
+
244
+ **Error:** {str(e)}
245
+
246
+ **Possible solutions:**
247
+ 1. Check if OPENAI_API_KEY is set in Spaces Secrets (Settings > Secrets)
248
+ 2. Ensure PDF files are in the `car_manual/` directory
249
+ 3. Check the logs for more details
250
 
251
+ **Traceback:**
252
+ ```
253
+ {traceback.format_exc()}
254
+ ```
255
+ """
256
+
257
+ return gr.Interface(
258
+ fn=lambda: error_msg,
259
+ inputs=None,
260
+ outputs=gr.Markdown(),
261
+ title="CSRC Car Manual RAG System",
262
  )
263
+
264
+ # Build Gradio interface with timeout
265
+ print("\n🌐 Building Gradio interface...")
266
+ try:
267
+ print("⏱️ Creating interface (max 30 seconds)...")
268
+ with timeout(30):
269
+ interface_builder = GradioInterfaceBuilder(
270
+ rag_engine=components["rag_engine"],
271
+ question_generator=components["question_generator"],
272
+ knowledge_graph=components["knowledge_graph"],
273
+ config=components["config"],
274
+ user_profiling=components["user_profiling"],
275
+ adaptive_engine=components["adaptive_engine"],
276
+ proactive_engine=components["proactive_engine"]
277
+ )
278
+
279
+ print("πŸ“¦ Creating interface components...")
280
+ demo = interface_builder.create_interface()
281
+ print("βœ… Gradio interface created successfully!")
282
+ return demo
283
+ except TimeoutError:
284
+ print("❌ Gradio interface creation timed out!")
285
+ import gradio as gr
286
+ error_msg = """
287
+ # ❌ Interface Creation Timeout
288
 
289
+ The Gradio interface took too long to create. This usually happens when:
290
+ 1. Too many components are being created
291
+ 2. Complex initialization in component callbacks
292
+
293
+ **Suggested solutions:**
294
+ 1. Simplify the interface
295
+ 2. Use lazy loading for complex components
296
+ 3. Move initialization logic out of interface creation
297
+ """
298
+ return gr.Interface(
299
+ fn=lambda: error_msg,
300
+ inputs=None,
301
+ outputs=gr.Markdown(),
302
+ title="CSRC Car Manual RAG System",
303
  )
304
+ except Exception as e:
305
+ print(f"❌ Error building Gradio interface: {e}")
306
+ import traceback
307
+ traceback.print_exc()
308
 
309
+ import gradio as gr
310
+ error_msg = f"""
311
+ # ❌ Interface Building Error
312
+
313
+ **Error:** {str(e)}
314
+
315
+ **Traceback:**
316
+ ```
317
+ {traceback.format_exc()}
318
+ ```
319
+ """
320
+
321
+ return gr.Interface(
322
+ fn=lambda: error_msg,
323
+ inputs=None,
324
+ outputs=gr.Markdown(),
325
+ title="CSRC Car Manual RAG System",
326
+ )
327
+
328
+
329
+ # Prevent multiple initializations
330
+ _app_instance = None
331
 
332
+ def get_app():
333
+ """Get or create the app instance (singleton pattern)"""
334
+ global _app_instance
335
+ if _app_instance is None:
336
+ print("πŸ”„ Creating new app instance...")
337
+ _app_instance = create_app()
338
+ print("βœ… App instance created!")
339
+ else:
340
+ print("♻️ Reusing existing app instance")
341
+ return _app_instance
342
 
 
 
 
 
343
 
344
+ # For Hugging Face Spaces
345
  if __name__ == "__main__":
346
+ demo = get_app()
347
+ demo.launch()
348
+ else:
349
+ # Module-level variable for Spaces auto-detection
350
+ demo = get_app()