Ariyan-Pro commited on
Commit
2362172
·
1 Parent(s): 9e722ff

REPLACE: Gradio with Streamlit - professional UI with tabs, metrics, ROI calculator

Browse files
Files changed (5) hide show
  1. .gitignore +19 -0
  2. Dockerfile +3 -3
  3. gradio_ui.py +0 -222
  4. requirements.txt +1 -1
  5. streamlit_app.py +284 -0
.gitignore ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # Virtual Environment
7
+ .venv/
8
+ venv/
9
+
10
+ # OS
11
+ .DS_Store
12
+
13
+ # Project specific
14
+ data/*.db
15
+ data/*.bin
16
+ *.csv
17
+ *.json
18
+ benchmarks/
19
+ models/
Dockerfile CHANGED
@@ -8,7 +8,7 @@ RUN pip install --no-cache-dir -r requirements.txt
8
  COPY . .
9
 
10
  # Expose port
11
- EXPOSE 7860
12
 
13
- # Run Gradio UI (it will call the FastAPI backend internally)
14
- CMD ["python", "gradio_ui.py"]
 
8
  COPY . .
9
 
10
  # Expose port
11
+ EXPOSE 8501
12
 
13
+ # Run Streamlit app
14
+ CMD ["streamlit", "run", "streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
gradio_ui.py DELETED
@@ -1,222 +0,0 @@
1
- import gradio as gr
2
- import requests
3
- import json
4
- import time
5
- from typing import Dict, Any
6
-
7
- # API base URL
8
- API_URL = "http://localhost:7860" # Local FastAPI
9
-
10
- def get_metrics() -> Dict[str, Any]:
11
- """Get performance metrics"""
12
- try:
13
- response = requests.get(f"{API_URL}/metrics")
14
- return response.json()
15
- except:
16
- return {
17
- "baseline_latency_ms": 247.3,
18
- "optimized_latency_ms": 91.7,
19
- "speedup_factor": 2.7,
20
- "latency_reduction_percent": 62.9
21
- }
22
-
23
- def query_rag(question: str) -> str:
24
- """Query the RAG system"""
25
- try:
26
- start_time = time.perf_counter()
27
- response = requests.post(
28
- f"{API_URL}/query",
29
- json={"question": question},
30
- timeout=10
31
- )
32
- latency = (time.perf_counter() - start_time) * 1000
33
-
34
- if response.status_code == 200:
35
- data = response.json()
36
- return f"""## 🤖 Response
37
-
38
- **Answer:** {data.get('answer', 'No answer')}
39
-
40
- **Performance:**
41
- - ⚡ **Latency:** {data.get('latency_ms', latency):.1f}ms
42
- - 🎯 **Speedup:** {data.get('optimization', '2.7× faster than baseline')}
43
- - 🏗️ **Architecture:** {data.get('architecture', 'CPU-only')}
44
- - 📊 **Chunks Used:** {data.get('chunks_used', 3)}
45
-
46
- **Technical Details:**
47
- - Cached: {data.get('cache_hit', True)}
48
- - Source: [GitHub Repository]({data.get('source_repo', 'https://github.com/Ariyan-Pro/RAG-Latency-Optimization')})"""
49
- else:
50
- return f"Error: {response.status_code}"
51
- except Exception as e:
52
- return f"Connection error: {str(e)}"
53
-
54
- def create_performance_dashboard():
55
- """Create performance dashboard"""
56
- metrics = get_metrics()
57
-
58
- return f"""## 📊 Performance Dashboard
59
-
60
- ### **Key Metrics**
61
- | Metric | Value | Improvement |
62
- |--------|-------|-------------|
63
- | **Baseline Latency** | {metrics['baseline_latency_ms']}ms | Reference |
64
- | **Optimized Latency** | ⚡ **{metrics['optimized_latency_ms']}ms** | **{metrics['speedup_factor']}× faster** |
65
- | **Latency Reduction** | {metrics['latency_reduction_percent']}% | ✅ |
66
- | **Chunk Reduction** | {metrics.get('chunks_reduction_percent', 60)}% | ✅ |
67
-
68
- ### **Scalability Projections**
69
- - **1,000 documents:** {metrics.get('scalability', {}).get('1,000_docs', '3.0× projected')}
70
- - **10,000 documents:** {metrics.get('scalability', {}).get('10,000_docs', '6.3× projected')}
71
- - **100,000 documents:** {metrics.get('scalability', {}).get('100,000_docs', '12.3× projected')}
72
-
73
- ### **Architecture**
74
- - **🖥️ CPU-only optimization**
75
- - **💾 FAISS + SQLite caching**
76
- - **⚡ FastAPI backend**
77
- - **🐳 Docker deployment**
78
-
79
- [View Source Code on GitHub](https://github.com/Ariyan-Pro/RAG-Latency-Optimization)"""
80
-
81
- # Create Gradio interface
82
- with gr.Blocks(theme=gr.themes.Soft(), title="RAG Latency Optimization") as demo:
83
- gr.Markdown("# ⚡ RAG Latency Optimization")
84
- gr.Markdown("### CPU-only RAG with **2.7× proven speedup** (247ms → 92ms)")
85
-
86
- with gr.Tabs():
87
- with gr.TabItem("🚀 Live Demo"):
88
- gr.Markdown("### Test the Optimized RAG System")
89
- with gr.Row():
90
- with gr.Column(scale=2):
91
- question_input = gr.Textbox(
92
- label="Ask a question",
93
- placeholder="What is machine learning?",
94
- value="What is artificial intelligence?"
95
- )
96
- query_btn = gr.Button("⚡ Get Optimized Response", variant="primary")
97
-
98
- with gr.Column(scale=3):
99
- output = gr.Markdown(label="Response")
100
-
101
- query_btn.click(query_rag, inputs=question_input, outputs=output)
102
-
103
- # Example questions
104
- gr.Examples(
105
- examples=[
106
- ["What is machine learning?"],
107
- ["Explain neural networks"],
108
- ["What is natural language processing?"],
109
- ["How does deep learning work?"]
110
- ],
111
- inputs=question_input
112
- )
113
-
114
- with gr.TabItem("📊 Performance Dashboard"):
115
- metrics_display = gr.Markdown()
116
- refresh_btn = gr.Button("🔄 Refresh Metrics")
117
-
118
- def refresh_metrics():
119
- return create_performance_dashboard()
120
-
121
- refresh_btn.click(refresh_metrics, outputs=metrics_display)
122
- demo.load(refresh_metrics, outputs=metrics_display)
123
-
124
- with gr.TabItem("🏗️ Architecture"):
125
- gr.Markdown("""
126
- ### System Architecture
127
-
128
- #### **Optimization Pipeline**
129
- 1. **📥 Input Processing**
130
- - Query preprocessing
131
- - Embedding generation
132
-
133
- 2. **⚡ Optimized Retrieval**
134
- - SQLite embedding cache
135
- - Keyword pre-filtering
136
- - FAISS vector search
137
-
138
- 3. **🤖 Intelligent Generation**
139
- - Prompt compression
140
- - Quantized inference
141
- - Response formatting
142
-
143
- #### **Key Technologies**
144
- - **Backend:** FastAPI, Python 3.11
145
- - **Vector Search:** FAISS-CPU
146
- - **Caching:** SQLite with LRU
147
- - **Embeddings:** SentenceTransformers
148
- - **Deployment:** Docker, Hugging Face Spaces
149
-
150
- #### **Business Impact**
151
- - **62.9% latency reduction**
152
- - **70%+ cost savings** vs GPU
153
- - **3-5 day integration** estimate
154
- - **Production-ready** architecture
155
- """)
156
-
157
- with gr.TabItem("📈 ROI Calculator"):
158
- gr.Markdown("### Return on Investment Calculator")
159
-
160
- with gr.Row():
161
- queries_per_day = gr.Slider(100, 100000, 1000, step=100, label="Queries per day")
162
- avg_query_size = gr.Slider(100, 5000, 1000, step=100, label="Avg tokens per query")
163
- team_size = gr.Slider(1, 20, 3, step=1, label="Engineering team size")
164
-
165
- with gr.Row():
166
- gpu_cost = gr.Number(5.0, label="GPU cost per hour ($)")
167
- engineer_cost = gr.Number(150, label="Engineer cost per hour ($)")
168
-
169
- calculate_btn = gr.Button("💰 Calculate ROI", variant="primary")
170
- roi_output = gr.Markdown()
171
-
172
- def calculate_roi(queries, tokens, team, gpu_cost_hour, engineer_cost_hour):
173
- # Calculations
174
- baseline_ms = 247.3
175
- optimized_ms = 91.7
176
-
177
- # Time savings
178
- time_saved_per_query = (baseline_ms - optimized_ms) / 1000 # seconds
179
- daily_time_saved = queries * time_saved_per_query / 3600 # hours
180
-
181
- # Cost savings
182
- gpu_savings = daily_time_saved * gpu_cost_hour * 30 # monthly
183
- engineer_savings = daily_time_saved * engineer_cost_hour * team * 30
184
-
185
- total_monthly_savings = gpu_savings + engineer_savings
186
-
187
- return f"""
188
- ## 💰 ROI Analysis
189
-
190
- ### **Monthly Savings**
191
- - **GPU Cost Savings:** ${gpu_savings:,.2f}
192
- - **Engineering Time Savings:** ${engineer_savings:,.2f}
193
- - **Total Monthly Savings:** **${total_monthly_savings:,.2f}**
194
-
195
- ### **Annual Impact**
196
- - **Yearly Savings:** **${total_monthly_savings * 12:,.2f}**
197
-
198
- ### **Performance Impact**
199
- - **Daily Time Saved:** {daily_time_saved:.2f} hours
200
- - **Queries Accelerated:** {queries:,} per day
201
- - **Latency Reduction:** 62.9%
202
- """
203
-
204
- calculate_btn.click(calculate_roi,
205
- inputs=[queries_per_day, avg_query_size, team_size, gpu_cost, engineer_cost],
206
- outputs=roi_output)
207
-
208
- # Footer
209
- gr.Markdown("---")
210
- gr.Markdown("""
211
- ### 🚀 Ready to Optimize Your RAG System?
212
-
213
- **This system demonstrates:**
214
- - ✅ **2.7× proven speedup** on CPU-only hardware
215
- - ✅ **Production-ready** with Docker deployment
216
- - ✅ **Measurable ROI** with real performance data
217
-
218
- **Contact for integration:** [GitHub](https://github.com/Ariyan-Pro) | **Deployment Time:** 3-5 days
219
- """)
220
-
221
- if __name__ == "__main__":
222
- demo.launch(server_name="0.0.0.0", server_port=7860, share=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
  fastapi==0.104.1
2
  uvicorn[standard]==0.24.0
3
- gradio==4.19.1
4
  requests==2.31.0
 
1
  fastapi==0.104.1
2
  uvicorn[standard]==0.24.0
3
+ streamlit==1.29.0
4
  requests==2.31.0
streamlit_app.py ADDED
@@ -0,0 +1,284 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ import json
4
+ import time
5
+
6
+ st.set_page_config(
7
+ page_title="RAG Latency Optimization",
8
+ page_icon="⚡",
9
+ layout="wide"
10
+ )
11
+
12
+ # Custom CSS for professional look
13
+ st.markdown("""
14
+ <style>
15
+ .main-header {
16
+ font-size: 2.5rem;
17
+ color: #1E88E5;
18
+ margin-bottom: 0.5rem;
19
+ }
20
+ .sub-header {
21
+ font-size: 1.2rem;
22
+ color: #666;
23
+ margin-bottom: 2rem;
24
+ }
25
+ .metric-card {
26
+ background: #f8f9fa;
27
+ padding: 1.5rem;
28
+ border-radius: 10px;
29
+ border-left: 5px solid #1E88E5;
30
+ margin-bottom: 1rem;
31
+ }
32
+ .success-text {
33
+ color: #4CAF50;
34
+ font-weight: bold;
35
+ }
36
+ .warning-text {
37
+ color: #FF9800;
38
+ font-weight: bold;
39
+ }
40
+ .stTabs [data-baseweb="tab-list"] {
41
+ gap: 24px;
42
+ }
43
+ .stTabs [data-baseweb="tab"] {
44
+ height: 50px;
45
+ white-space: pre-wrap;
46
+ background-color: #f8f9fa;
47
+ border-radius: 4px 4px 0px 0px;
48
+ gap: 1px;
49
+ padding-top: 10px;
50
+ padding-bottom: 10px;
51
+ }
52
+ </style>
53
+ """, unsafe_allow_html=True)
54
+
55
+ # Header
56
+ st.markdown('<h1 class="main-header">⚡ RAG Latency Optimization</h1>', unsafe_allow_html=True)
57
+ st.markdown('<p class="sub-header">CPU-only RAG with <strong>2.7× proven speedup</strong> (247ms → 92ms)</p>', unsafe_allow_html=True)
58
+
59
+ # Tabs
60
+ tab1, tab2, tab3, tab4 = st.tabs(["🚀 Live Demo", "📊 Performance", "🏗️ Architecture", "💰 ROI Calculator"])
61
+
62
+ with tab1:
63
+ st.header("Test the Optimized RAG System")
64
+
65
+ col1, col2 = st.columns([2, 3])
66
+
67
+ with col1:
68
+ question = st.text_area(
69
+ "Ask a question:",
70
+ value="What is artificial intelligence?",
71
+ height=100,
72
+ placeholder="What is machine learning?"
73
+ )
74
+
75
+ if st.button("⚡ Get Optimized Response", type="primary"):
76
+ with st.spinner("Processing with optimized RAG..."):
77
+ start_time = time.perf_counter()
78
+ try:
79
+ response = requests.post(
80
+ "http://localhost:7860/query",
81
+ json={"question": question},
82
+ timeout=10
83
+ )
84
+ latency = (time.perf_counter() - start_time) * 1000
85
+
86
+ if response.status_code == 200:
87
+ data = response.json()
88
+
89
+ st.success(f"Response generated in {data.get('latency_ms', latency):.1f}ms")
90
+
91
+ st.markdown("### 🤖 Answer")
92
+ st.write(data.get('answer', ''))
93
+
94
+ st.markdown("### 📊 Performance")
95
+ col_a, col_b, col_c = st.columns(3)
96
+ with col_a:
97
+ st.metric("Latency", f"{data.get('latency_ms', latency):.1f}ms")
98
+ with col_b:
99
+ st.metric("Speedup", "2.7×")
100
+ with col_c:
101
+ st.metric("Architecture", data.get('architecture', 'CPU-only'))
102
+
103
+ else:
104
+ st.error(f"API Error: {response.status_code}")
105
+ except Exception as e:
106
+ st.error(f"Connection error: {str(e)}")
107
+
108
+ with col2:
109
+ st.markdown("### 📈 Quick Performance Overview")
110
+
111
+ # Simulated metrics
112
+ metrics = {
113
+ "baseline": 247.3,
114
+ "optimized": 91.7,
115
+ "speedup": 2.7,
116
+ "reduction": 62.9
117
+ }
118
+
119
+ st.metric("Baseline Latency", f"{metrics['baseline']}ms", delta=None)
120
+ st.metric("Optimized Latency", f"{metrics['optimized']}ms",
121
+ delta=f"-{metrics['reduction']}%", delta_color="inverse")
122
+ st.metric("Speedup Factor", f"{metrics['speedup']}×", delta_color="off")
123
+
124
+ st.markdown("---")
125
+ st.markdown("### 💡 Example Questions")
126
+ examples = [
127
+ "What is machine learning?",
128
+ "Explain neural networks",
129
+ "What is natural language processing?",
130
+ "How does deep learning work?"
131
+ ]
132
+
133
+ for example in examples:
134
+ if st.button(f"❓ {example}", key=example):
135
+ st.session_state.example_question = example
136
+
137
+ with tab2:
138
+ st.header("Performance Dashboard")
139
+
140
+ col1, col2 = st.columns(2)
141
+
142
+ with col1:
143
+ st.markdown('<div class="metric-card">', unsafe_allow_html=True)
144
+ st.metric("Baseline Latency", "247.3ms", "Reference")
145
+ st.markdown('</div>', unsafe_allow_html=True)
146
+
147
+ st.markdown('<div class="metric-card">', unsafe_allow_html=True)
148
+ st.metric("Optimized Latency", "91.7ms", "-62.9%", delta_color="inverse")
149
+ st.markdown('</div>', unsafe_allow_html=True)
150
+
151
+ with col2:
152
+ st.markdown('<div class="metric-card">', unsafe_allow_html=True)
153
+ st.metric("Speedup Factor", "2.7×", "+170%")
154
+ st.markdown('</div>', unsafe_allow_html=True)
155
+
156
+ st.markdown('<div class="metric-card">', unsafe_allow_html=True)
157
+ st.metric("Chunks Used", "3.0", "-40%", delta_color="inverse")
158
+ st.markdown('</div>', unsafe_allow_html=True)
159
+
160
+ st.markdown("### 📈 Scalability Projections")
161
+
162
+ scalability_data = {
163
+ "Documents": ["12 (Current)", "1,000", "10,000", "100,000"],
164
+ "Baseline Latency": ["247ms", "~850ms", "~2,500ms", "~8,000ms"],
165
+ "Optimized Latency": ["92ms", "~280ms", "~400ms", "~650ms"],
166
+ "Speedup": ["2.7×", "3.0×", "6.3×", "12.3×"]
167
+ }
168
+
169
+ st.dataframe(scalability_data, use_container_width=True, hide_index=True)
170
+
171
+ st.info("""
172
+ **Note:** Projections based on FAISS logarithmic scaling and caching efficiency.
173
+ At 100K documents, the optimized system is **12.3× faster** than baseline.
174
+ """)
175
+
176
+ with tab3:
177
+ st.header("System Architecture")
178
+
179
+ st.markdown("### 🏗️ Optimization Pipeline")
180
+
181
+ pipeline_steps = [
182
+ ("📥 Input Processing", "Query preprocessing and embedding generation"),
183
+ ("⚡ Optimized Retrieval", "SQLite caching + keyword filtering + FAISS search"),
184
+ ("🤖 Intelligent Generation", "Prompt compression + quantized inference")
185
+ ]
186
+
187
+ for step, description in pipeline_steps:
188
+ with st.expander(f"**{step}**"):
189
+ st.write(description)
190
+
191
+ st.markdown("### 🔧 Technology Stack")
192
+
193
+ tech_cols = st.columns(3)
194
+ with tech_cols[0]:
195
+ st.markdown("**Backend**")
196
+ st.write("- FastAPI")
197
+ st.write("- Python 3.11")
198
+ st.write("- Uvicorn")
199
+
200
+ with tech_cols[1]:
201
+ st.markdown("**Vector Search**")
202
+ st.write("- FAISS-CPU")
203
+ st.write("- SentenceTransformers")
204
+ st.write("- SQLite caching")
205
+
206
+ with tech_cols[2]:
207
+ st.markdown("**Deployment**")
208
+ st.write("- Docker")
209
+ st.write("- Hugging Face Spaces")
210
+ st.write("- Production-ready")
211
+
212
+ st.markdown("### 🎯 Business Impact")
213
+
214
+ impact_cols = st.columns(4)
215
+ with impact_cols[0]:
216
+ st.metric("Latency Reduction", "62.9%")
217
+ with impact_cols[1]:
218
+ st.metric("Cost Savings", "70%+", "vs GPU")
219
+ with impact_cols[2]:
220
+ st.metric("Integration Time", "3-5 days")
221
+ with impact_cols[3]:
222
+ st.metric("Production Ready", "✅")
223
+
224
+ with tab4:
225
+ st.header("ROI Calculator")
226
+
227
+ st.markdown("Estimate your cost savings with CPU-optimized RAG")
228
+
229
+ col1, col2 = st.columns(2)
230
+
231
+ with col1:
232
+ queries_per_day = st.slider("Queries per day", 100, 100000, 1000, 100)
233
+ avg_query_size = st.slider("Avg tokens per query", 100, 5000, 1000, 100)
234
+
235
+ with col2:
236
+ team_size = st.slider("Engineering team size", 1, 20, 3, 1)
237
+ gpu_cost = st.number_input("GPU cost per hour ($)", 1.0, 20.0, 5.0, 0.5)
238
+
239
+ engineer_cost = st.number_input("Engineer cost per hour ($)", 50.0, 300.0, 150.0, 10.0)
240
+
241
+ if st.button("💰 Calculate ROI", type="primary"):
242
+ # Calculations
243
+ baseline_ms = 247.3
244
+ optimized_ms = 91.7
245
+
246
+ # Time savings
247
+ time_saved_per_query = (baseline_ms - optimized_ms) / 1000 # seconds
248
+ daily_time_saved = queries_per_day * time_saved_per_query / 3600 # hours
249
+
250
+ # Cost savings
251
+ gpu_savings = daily_time_saved * gpu_cost * 30 # monthly
252
+ engineer_savings = daily_time_saved * engineer_cost * team_size * 30
253
+
254
+ total_monthly_savings = gpu_savings + engineer_savings
255
+
256
+ st.markdown("---")
257
+ st.markdown("### 💰 ROI Analysis")
258
+
259
+ roi_cols = st.columns(2)
260
+ with roi_cols[0]:
261
+ st.metric("Monthly GPU Savings", f"${gpu_savings:,.2f}")
262
+ st.metric("Monthly Engineering Savings", f"${engineer_savings:,.2f}")
263
+
264
+ with roi_cols[1]:
265
+ st.metric("Total Monthly Savings", f"${total_monthly_savings:,.2f}")
266
+ st.metric("Annual Savings", f"${total_monthly_savings * 12:,.0f}")
267
+
268
+ st.success(f"**Estimated ROI:** Save ${total_monthly_savings:,.0f}/month with optimized RAG")
269
+
270
+ # Footer
271
+ st.markdown("---")
272
+ st.markdown("""
273
+ <div style="text-align: center; padding: 2rem;">
274
+ <h3>🚀 Ready to Optimize Your RAG System?</h3>
275
+ <p>This system demonstrates <strong>2.7× proven speedup</strong> on CPU-only hardware with production-ready deployment.</p>
276
+ <p>
277
+ <strong>Source Code:</strong>
278
+ <a href="https://github.com/Ariyan-Pro/RAG-Latency-Optimization" target="_blank">
279
+ GitHub Repository
280
+ </a>
281
+ </p>
282
+ <p><strong>Deployment Time Estimate:</strong> 3-5 days for existing stacks</p>
283
+ </div>
284
+ """, unsafe_allow_html=True)