axelsirota commited on
Commit
5e3e5cd
·
verified ·
1 Parent(s): 7d20650

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. README.md +12 -6
  2. app.py +247 -0
  3. requirements.txt +1 -0
README.md CHANGED
@@ -1,12 +1,18 @@
1
  ---
2
- title: Rag Vs Finetuning
3
- emoji: 🚀
4
- colorFrom: pink
5
- colorTo: yellow
6
  sdk: gradio
7
- sdk_version: 6.5.1
8
  app_file: app.py
9
  pinned: false
 
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
1
  ---
2
+ title: RAG vs Fine-tuning
3
+ emoji: ⚖️
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 5.9.1
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
11
+ short_description: Compare RAG, fine-tuning, and long context approaches
12
  ---
13
 
14
+ # RAG vs Fine-tuning Comparison
15
+
16
+ Compare RAG, fine-tuning, and long context approaches for your use case. See which approach fits best based on your requirements.
17
+
18
+ Part of the **AI for Product Managers** course by Data Trainers LLC.
app.py ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+
4
+ def analyze_approach(
5
+ update_frequency,
6
+ corpus_size,
7
+ need_citations,
8
+ training_examples,
9
+ budget,
10
+ timeline,
11
+ volume
12
+ ):
13
+ """Analyze which approach is best based on inputs."""
14
+
15
+ scores = {"RAG": 0, "Fine-tuning": 0, "Long Context": 0}
16
+ reasons = {"RAG": [], "Fine-tuning": [], "Long Context": []}
17
+
18
+ # Update frequency
19
+ if update_frequency == "Daily/Weekly":
20
+ scores["RAG"] += 3
21
+ reasons["RAG"].append("Frequent updates - RAG handles without retraining")
22
+ scores["Fine-tuning"] -= 2
23
+ reasons["Fine-tuning"].append("Would need constant retraining")
24
+ elif update_frequency == "Monthly":
25
+ scores["RAG"] += 2
26
+ reasons["RAG"].append("Monthly updates manageable with RAG")
27
+ scores["Long Context"] += 1
28
+ else: # Rarely
29
+ scores["Fine-tuning"] += 2
30
+ reasons["Fine-tuning"].append("Stable content suits fine-tuning")
31
+ scores["Long Context"] += 1
32
+
33
+ # Corpus size
34
+ if corpus_size == "Small (<50 pages)":
35
+ scores["Long Context"] += 3
36
+ reasons["Long Context"].append("Small corpus fits in context window")
37
+ scores["RAG"] -= 1
38
+ reasons["RAG"].append("RAG may be overkill for small corpus")
39
+ elif corpus_size == "Medium (50-500 pages)":
40
+ scores["RAG"] += 2
41
+ reasons["RAG"].append("Medium corpus ideal for RAG")
42
+ else: # Large
43
+ scores["RAG"] += 3
44
+ reasons["RAG"].append("Large corpus requires RAG")
45
+ scores["Long Context"] -= 3
46
+ reasons["Long Context"].append("Too large for context window")
47
+
48
+ # Citations
49
+ if need_citations == "Required":
50
+ scores["RAG"] += 3
51
+ reasons["RAG"].append("RAG provides source citations")
52
+ scores["Fine-tuning"] -= 2
53
+ reasons["Fine-tuning"].append("Fine-tuning can't provide citations")
54
+ elif need_citations == "Nice to have":
55
+ scores["RAG"] += 1
56
+
57
+ # Training examples
58
+ if training_examples == "1000+":
59
+ scores["Fine-tuning"] += 2
60
+ reasons["Fine-tuning"].append("Sufficient training data available")
61
+ elif training_examples == "100-1000":
62
+ scores["Fine-tuning"] -= 1
63
+ reasons["Fine-tuning"].append("Limited training data")
64
+ else: # <100
65
+ scores["Fine-tuning"] -= 2
66
+ reasons["Fine-tuning"].append("Insufficient data for fine-tuning")
67
+
68
+ # Budget
69
+ if budget == "Low (<$5K)":
70
+ scores["Long Context"] += 2
71
+ reasons["Long Context"].append("Lowest setup cost")
72
+ scores["Fine-tuning"] -= 2
73
+ reasons["Fine-tuning"].append("Fine-tuning typically costs $10K+")
74
+ elif budget == "Medium ($5K-$50K)":
75
+ scores["RAG"] += 2
76
+ reasons["RAG"].append("Good budget for RAG setup")
77
+
78
+ # Timeline
79
+ if timeline == "Urgent (<2 weeks)":
80
+ scores["Long Context"] += 2
81
+ reasons["Long Context"].append("Fastest to implement")
82
+ scores["Fine-tuning"] -= 2
83
+ reasons["Fine-tuning"].append("Fine-tuning takes weeks-months")
84
+ elif timeline == "Standard (1-2 months)":
85
+ scores["RAG"] += 1
86
+
87
+ # Volume
88
+ if volume == "High (10K+ queries/day)":
89
+ scores["Fine-tuning"] += 2
90
+ reasons["Fine-tuning"].append("Fine-tuning has lower per-query cost at scale")
91
+ scores["Long Context"] -= 2
92
+ reasons["Long Context"].append("Long context expensive at high volume")
93
+
94
+ # Determine winner
95
+ sorted_approaches = sorted(scores.items(), key=lambda x: x[1], reverse=True)
96
+ winner = sorted_approaches[0][0]
97
+ runner_up = sorted_approaches[1][0]
98
+
99
+ # Build recommendation
100
+ recommendation = f"## Recommendation: **{winner}**\n\n"
101
+
102
+ if winner == "RAG":
103
+ recommendation += """### Why RAG?
104
+ RAG (Retrieval-Augmented Generation) retrieves relevant documents at query time and uses them to generate grounded answers.
105
+
106
+ **Pros:**
107
+ - Updates without retraining
108
+ - Provides citations
109
+ - Handles large document sets
110
+ - Moderate setup cost
111
+
112
+ **Cons:**
113
+ - Retrieval quality depends on chunking
114
+ - Additional latency for retrieval step
115
+ - Requires vector database
116
+
117
+ """
118
+ elif winner == "Fine-tuning":
119
+ recommendation += """### Why Fine-tuning?
120
+ Fine-tuning retrains the model on your specific data to learn domain knowledge, terminology, and desired output format.
121
+
122
+ **Pros:**
123
+ - Lower per-query cost at scale
124
+ - Consistent style/format
125
+ - No retrieval latency
126
+
127
+ **Cons:**
128
+ - High upfront cost ($10K-$200K)
129
+ - Slow to update (requires retraining)
130
+ - Can't provide citations
131
+ - Needs 1000+ training examples
132
+
133
+ """
134
+ else:
135
+ recommendation += """### Why Long Context?
136
+ Long context simply includes all relevant documents directly in the prompt for each query.
137
+
138
+ **Pros:**
139
+ - Simplest to implement
140
+ - No infrastructure needed
141
+ - Easy to update
142
+
143
+ **Cons:**
144
+ - Limited corpus size
145
+ - High per-query cost
146
+ - Doesn't scale
147
+
148
+ """
149
+
150
+ # Add reasons
151
+ recommendation += f"### Key Factors for Your Use Case\n\n"
152
+ if reasons[winner]:
153
+ recommendation += "**In favor of " + winner + ":**\n"
154
+ for r in reasons[winner]:
155
+ recommendation += f"- {r}\n"
156
+
157
+ recommendation += f"\n**Runner-up: {runner_up}**\n"
158
+ if reasons[runner_up]:
159
+ for r in reasons[runner_up][:2]:
160
+ recommendation += f"- {r}\n"
161
+
162
+ # Comparison table
163
+ comparison = """## Approach Comparison
164
+
165
+ | Factor | RAG | Fine-tuning | Long Context |
166
+ |--------|-----|-------------|--------------|
167
+ | **Setup Cost** | $1K-$10K | $10K-$200K | $0 |
168
+ | **Time to Update** | Minutes | Weeks | Immediate |
169
+ | **Citations** | ✅ Yes | ❌ No | ⚠️ Manual |
170
+ | **Max Corpus** | Unlimited | N/A | ~100K tokens |
171
+ | **Per-Query Cost** | Medium | Low | High |
172
+ | **Scalability** | High | High | Low |
173
+ """
174
+
175
+ return recommendation, comparison
176
+
177
+
178
+ # Build interface
179
+ with gr.Blocks(title="RAG vs Fine-tuning", theme=gr.themes.Soft()) as demo:
180
+ gr.Markdown(
181
+ "# RAG vs Fine-tuning Comparison\n\n"
182
+ "**PM Decision:** Your engineering team proposes grounding AI in company knowledge. "
183
+ "But which approach? RAG, fine-tuning, or just long context?\n\n"
184
+ "Answer these questions to get a recommendation based on your specific requirements."
185
+ )
186
+
187
+ with gr.Row():
188
+ with gr.Column():
189
+ update_freq = gr.Radio(
190
+ choices=["Daily/Weekly", "Monthly", "Rarely"],
191
+ label="How often do your documents change?",
192
+ value="Monthly"
193
+ )
194
+ corpus_size = gr.Radio(
195
+ choices=["Small (<50 pages)", "Medium (50-500 pages)", "Large (500+ pages)"],
196
+ label="How large is your document corpus?",
197
+ value="Medium (50-500 pages)"
198
+ )
199
+ citations = gr.Radio(
200
+ choices=["Required", "Nice to have", "Not needed"],
201
+ label="Do you need to cite source documents?",
202
+ value="Nice to have"
203
+ )
204
+ training_data = gr.Radio(
205
+ choices=["<100 examples", "100-1000 examples", "1000+ examples"],
206
+ label="How many training examples do you have?",
207
+ value="100-1000 examples"
208
+ )
209
+
210
+ with gr.Column():
211
+ budget = gr.Radio(
212
+ choices=["Low (<$5K)", "Medium ($5K-$50K)", "High ($50K+)"],
213
+ label="What's your budget for setup?",
214
+ value="Medium ($5K-$50K)"
215
+ )
216
+ timeline = gr.Radio(
217
+ choices=["Urgent (<2 weeks)", "Standard (1-2 months)", "Flexible (3+ months)"],
218
+ label="What's your timeline?",
219
+ value="Standard (1-2 months)"
220
+ )
221
+ volume = gr.Radio(
222
+ choices=["Low (<1K queries/day)", "Medium (1K-10K)", "High (10K+ queries/day)"],
223
+ label="Expected query volume?",
224
+ value="Medium (1K-10K)"
225
+ )
226
+
227
+ analyze_btn = gr.Button("Get Recommendation", variant="primary")
228
+
229
+ recommendation_output = gr.Markdown(label="Recommendation")
230
+ comparison_output = gr.Markdown(label="Comparison")
231
+
232
+ analyze_btn.click(
233
+ analyze_approach,
234
+ inputs=[update_freq, corpus_size, citations, training_data, budget, timeline, volume],
235
+ outputs=[recommendation_output, comparison_output]
236
+ )
237
+
238
+ gr.Markdown(
239
+ "---\n"
240
+ "**PM Takeaway:** RAG is usually the safest choice - it's easier to update, provides citations, "
241
+ "and has moderate costs. Only consider fine-tuning if you have stable content, 1000+ examples, "
242
+ "and don't need citations.\n\n"
243
+ "*AI for Product Managers*"
244
+ )
245
+
246
+ if __name__ == "__main__":
247
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ # No additional requirements - uses Gradio only