File size: 14,567 Bytes
46f2cb3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23f437b
46f2cb3
23f437b
46f2cb3
23f437b
46f2cb3
 
 
 
 
 
 
 
23f437b
46f2cb3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89b6166
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46f2cb3
 
 
 
 
 
 
23f437b
46f2cb3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
#!/usr/bin/env python3
"""
Web interface for querying the product design document
"""

import sys
import os
from pathlib import Path

# Try to use venv Python if available
script_dir = Path(__file__).parent
venv_python = script_dir / "venv" / "bin" / "python3"
if venv_python.exists():
    # Add venv site-packages to path
    venv_site_packages = script_dir / "venv" / "lib" / "python3.13" / "site-packages"
    if not venv_site_packages.exists():
        # Try to find the actual Python version in venv
        import glob
        lib_dir = script_dir / "venv" / "lib"
        if lib_dir.exists():
            python_dirs = glob.glob(str(lib_dir / "python*"))
            if python_dirs:
                venv_site_packages = Path(python_dirs[0]) / "site-packages"
    
    if venv_site_packages.exists():
        sys.path.insert(0, str(venv_site_packages))

try:
    from flask import Flask, render_template, request, jsonify
except ImportError:
    print("❌ Flask not installed!")
    print("\nπŸ’‘ Solutions:")
    print("\n1. Activate venv and install:")
    print("   source venv/bin/activate")
    print("   pip install flask flask-cors")
    print("\n2. Or install in system Python:")
    print("   pip3 install flask flask-cors")
    print("\n3. Or use the helper script:")
    print("   ./scripts/setup/start_web.sh")
    sys.exit(1)

import subprocess
import json
import re
import socket
import time

app = Flask(__name__, template_folder='templates', static_folder='static')

# Allow CORS if needed
@app.after_request
def after_request(response):
    response.headers.add('Access-Control-Allow-Origin', '*')
    response.headers.add('Access-Control-Allow-Headers', 'Content-Type,Authorization')
    response.headers.add('Access-Control-Allow-Methods', 'GET,PUT,POST,DELETE,OPTIONS')
    return response

def find_modal_command():
    """Find the modal command"""
    import shutil
    script_dir = Path(__file__).parent
    venv_modal = script_dir / "venv" / "bin" / "modal"
    if venv_modal.exists():
        return [str(venv_modal)]
    
    modal_path = shutil.which("modal")
    if modal_path:
        return ["modal"]
    
    return [sys.executable, "-m", "modal"]

def query_rag(question: str):
    """Query the RAG system via Modal or Nebius HTTP service"""
    # Check if RAG_SERVICE_URL is set (Nebius deployment)
    rag_service_url = os.getenv("RAG_SERVICE_URL")
    
    if rag_service_url:
        # Use HTTP API (Nebius deployment)
        try:
            import requests
            response = requests.post(
                f"{rag_service_url}/query",
                json={"question": question},
                timeout=180
            )
            response.raise_for_status()
            data = response.json()
            
            # Convert to expected format
            return {
                "success": data.get("success", True),
                "answer": data.get("answer", ""),
                "retrieval_time": data.get("retrieval_time", 0),
                "generation_time": data.get("generation_time", 0),
                "sources": data.get("sources", []),
                "raw_output": str(data)
            }
        except ImportError:
            return {
                "success": False,
                "error": "requests library not installed. Install with: pip install requests"
            }
        except Exception as e:
            return {
                "success": False,
                "error": f"Error connecting to RAG service: {str(e)}"
            }
    
    # Fall back to Modal (original deployment)
    modal_cmd = find_modal_command()
    
    try:
        # Increased timeout for Modal cold starts
        result = subprocess.run(
            modal_cmd + [
                "run",
                "src/rag/modal-rag-product-design.py::query_product_design",
                "--question", question
            ],
            capture_output=True,
            text=True,
            timeout=180,  # 3 minutes for cold start
            check=True
        )
        
        # Parse the output
        output = result.stdout
        
        # Extract answer - improved logic
        answer = ""
        
        # Try multiple patterns to find the answer
        answer_start = output.find("πŸ“ Answer:")
        if answer_start == -1:
            answer_start = output.find("Answer:")
        if answer_start == -1:
            # Look for "To decide" or other common answer starters
            answer_start = output.find("To decide")
        if answer_start == -1:
            answer_start = output.find("Based on")
        
        if answer_start != -1:
            answer_section = output[answer_start:]
            # Find the end of answer (before Sources, timing, or separators)
            # Try multiple patterns to find the end
            answer_end = answer_section.find("πŸ“š Sources")
            if answer_end == -1:
                answer_end = answer_section.find("============================================================")
            if answer_end == -1:
                answer_end = answer_section.find("⏱️  Retrieval:")
            if answer_end == -1:
                answer_end = answer_section.find("⏱️ Retrieval:")
            if answer_end == -1:
                # Look for double newline followed by timing or sources
                answer_end = answer_section.find("\n\n⏱️")
            if answer_end == -1:
                answer_end = answer_section.find("\n\nπŸ“š")
            if answer_end == -1:
                # Look for triple newline (likely end of answer)
                answer_end = answer_section.find("\n\n\n")
            
            if answer_end != -1:
                answer = answer_section[:answer_end]
            else:
                # If no clear end found, take everything up to a reasonable point
                # Look for common patterns that indicate end of answer
                lines = answer_section.split('\n')
                answer_lines = []
                for line in lines:
                    line = line.strip()
                    if not line:
                        continue
                    # Stop if we hit sources, timing, or separators
                    if line.startswith('πŸ“š') or line.startswith('⏱️') or '===' in line:
                        break
                    # Stop if we hit "Stopping app" or Modal messages
                    if 'Stopping app' in line or 'View run at' in line or 'modal.com' in line:
                        break
                    answer_lines.append(line)
                answer = '\n'.join(answer_lines)
            
            # Remove answer markers
            answer = answer.replace("πŸ“ Answer:", "").replace("Answer:", "").strip()
        else:
            # Fallback: extract meaningful content before sources
            sources_pos = output.find("πŸ“š Sources")
            if sources_pos != -1:
                answer = output[:sources_pos].strip()
            else:
                answer = output
        
        # Clean up the answer - remove markdown table syntax, extra whitespace
        # Remove markdown table separators and structure
        answer = re.sub(r'\|[\s\-:]+\|', '', answer)
        # Remove table row separators (lines that are mostly pipes)
        answer = re.sub(r'^\|[\s\|\-:]+\|?\s*$', '', answer, flags=re.MULTILINE)
        # Remove standalone pipe characters
        answer = re.sub(r'^\s*\|\s*$', '', answer, flags=re.MULTILINE)
        # Remove pipe characters from start/end of lines (but keep content)
        answer = re.sub(r'^\|\s*', '', answer, flags=re.MULTILINE)
        answer = re.sub(r'\s*\|$', '', answer, flags=re.MULTILINE)
        # Remove lines that are just separators
        answer = re.sub(r'^=+\s*$', '', answer, flags=re.MULTILINE)
        # Remove timing info if it got mixed in
        answer = re.sub(r'⏱️\s*Retrieval:.*', '', answer)
        answer = re.sub(r'⏱️\s*Generation:.*', '', answer)
        # Remove Modal app messages
        answer = re.sub(r'Stopping app.*', '', answer)
        answer = re.sub(r'View run at.*', '', answer)
        answer = re.sub(r'modal\.com.*', '', answer)
        # Clean up multiple spaces
        answer = re.sub(r' {3,}', ' ', answer)
        # Clean up multiple newlines (keep max 2)
        answer = re.sub(r'\n{3,}', '\n\n', answer)
        # Remove empty lines at start/end
        answer = answer.strip()
        
        # Extract timing info
        retrieval_time = None
        generation_time = None
        if "⏱️  Retrieval:" in output:
            try:
                retrieval_line = [l for l in output.split("\n") if "⏱️  Retrieval:" in l][0]
                retrieval_time = float(retrieval_line.split(":")[1].strip().replace("s", ""))
            except:
                pass
        
        if "⏱️  Generation:" in output:
            try:
                gen_line = [l for l in output.split("\n") if "⏱️  Generation:" in l][0]
                generation_time = float(gen_line.split(":")[1].strip().replace("s", ""))
            except:
                pass
        
        # Extract sources - improved parsing
        sources = []
        if "πŸ“š Sources" in output:
            sources_section = output[output.find("πŸ“š Sources"):]
            source_lines = sources_section.split("\n")[1:]  # Skip header
            
            current_source = {}
            for line in source_lines:
                line = line.strip()
                if not line:
                    continue
                    
                # Check if this is a new source (starts with number)
                if line and line[0].isdigit() and ('.' in line[:3] or ')' in line[:3]):
                    if current_source and (current_source.get("path") or current_source.get("content")):
                        sources.append(current_source)
                    current_source = {"number": line.split('.')[0] if '.' in line else line.split(')')[0], "content": ""}
                elif "/insurance-data" in line or "tokyo_auto_insurance" in line.lower():
                    # This is likely a file path
                    path = line.replace("/insurance-data/", "").replace("docs/", "")
                    current_source["path"] = path
                elif line and current_source:
                    # This is content
                    # Skip if it's just separators or timing info
                    if "===" in line or "Retrieval:" in line or "Generation:" in line:
                        continue
                    # Clean up the content
                    clean_line = line.replace("|", "").strip()
                    if clean_line and len(clean_line) > 10:  # Only add substantial content
                        current_source["content"] += clean_line + " "
            
            # Add the last source
            if current_source and (current_source.get("path") or current_source.get("content")):
                sources.append(current_source)
            
            # Clean up source content
            for source in sources:
                if source.get("content"):
                    # Remove markdown table syntax
                    source["content"] = re.sub(r'\|[\s\-:]+\|', '', source["content"])
                    source["content"] = re.sub(r'\s+\|\s+', ' ', source["content"])
                    source["content"] = source["content"].strip()
                    # Limit length
                    if len(source["content"]) > 400:
                        source["content"] = source["content"][:400] + "..."
        
        return {
            "success": True,
            "answer": answer,
            "retrieval_time": retrieval_time,
            "generation_time": generation_time,
            "sources": sources[:5],  # Limit to 5 sources
            "raw_output": output
        }
        
    except subprocess.TimeoutExpired:
        return {
            "success": False,
            "error": "Query timed out after 3 minutes. This might be due to Modal cold start (first query takes 10-15 seconds). Please try again - subsequent queries should be faster. If the problem persists, try a simpler question."
        }
    except subprocess.CalledProcessError as e:
        return {
            "success": False,
            "error": f"Error executing query: {e.stderr or str(e)}"
        }
    except Exception as e:
        return {
            "success": False,
            "error": f"Unexpected error: {str(e)}"
        }

@app.route('/')
def index():
    """Main page"""
    try:
        return render_template('index.html')
    except Exception as e:
        return f"Error rendering template: {str(e)}", 500

@app.route('/api/query', methods=['POST'])
def api_query():
    """API endpoint for queries"""
    data = request.json
    question = data.get('question', '').strip()
    
    if not question:
        return jsonify({
            "success": False,
            "error": "Please provide a question"
        }), 400
    
    result = query_rag(question)
    return jsonify(result)

@app.route('/health')
def health():
    """Health check endpoint"""
    return jsonify({"status": "ok"})

if __name__ == '__main__':
    # Check if Modal is available
    modal_cmd = find_modal_command()
    try:
        test_result = subprocess.run(
            modal_cmd + ["--version"],
            capture_output=True,
            timeout=5
        )
        if test_result.returncode != 0:
            print("⚠️  Warning: Modal command may not be working correctly")
    except:
        print("⚠️  Warning: Could not verify Modal installation")
    
    # Try to find an available port
    import socket
    port = 5000
    for p in range(5000, 5010):
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        result = sock.connect_ex(('127.0.0.1', p))
        sock.close()
        if result != 0:
            port = p
            break
    
    print("\n" + "="*70)
    print("πŸš€ Starting Product Design RAG Web Interface")
    print("="*70)
    print(f"\nπŸ“‹ Access the interface at:")
    print(f"   β†’ http://127.0.0.1:{port}  ⭐ RECOMMENDED")
    print(f"   β†’ http://localhost:{port}")
    print(f"\nπŸ’‘ If you see 403 error, use 127.0.0.1 instead of localhost")
    print("πŸ’‘ Press Ctrl+C to stop\n")
    
    # Run with explicit host and port, allow all origins
    # Using 127.0.0.1 is more reliable than localhost on some systems
    app.run(debug=True, host='127.0.0.1', port=port, threaded=True, use_reloader=False)