kambris commited on
Commit
e3e3e14
·
verified ·
1 Parent(s): 22a1949

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -225
app.py CHANGED
@@ -3,281 +3,122 @@ import requests
3
  import os
4
  from datetime import datetime
5
  import pandas as pd
6
- import time
7
 
8
- # Initialize with your token
9
  HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
10
 
11
- # Use models that work with the free Serverless Inference API
12
  MODELS = [
13
- "mistralai/Mistral-7B-Instruct-v0.2",
14
- "google/flan-t5-xxl",
15
- "microsoft/DialoGPT-large",
16
- "bigscience/bloom-560m"
17
  ]
18
 
19
- def query_model(model_id, prompt, max_tokens=500, temperature=0.7):
20
- """
21
- Query a model using the direct Inference API endpoint
22
- """
23
- API_URL = f"https://router.huggingface.co/models/{model_id}"
24
- headers = {"Authorization": f"Bearer {HF_TOKEN}"}
 
25
 
26
  payload = {
27
- "inputs": prompt,
28
- "parameters": {
29
- "max_new_tokens": max_tokens,
30
- "temperature": temperature,
31
- "return_full_text": False
32
- }
33
  }
34
 
35
  try:
36
- response = requests.post(API_URL, headers=headers, json=payload)
37
-
38
- # Handle model loading (503 error)
39
- if response.status_code == 503:
40
- result = response.json()
41
- if "estimated_time" in result:
42
- wait_time = result["estimated_time"]
43
- return f"Model is loading... estimated wait: {wait_time}s. Please try again."
44
- return "Model is currently loading. Please try again in a moment."
45
 
46
  if response.status_code == 200:
47
  result = response.json()
48
-
49
- # Handle different response formats
50
- if isinstance(result, list) and len(result) > 0:
51
- if "generated_text" in result[0]:
52
- return result[0]["generated_text"]
53
- elif "translation_text" in result[0]:
54
- return result[0]["translation_text"]
55
- else:
56
- return str(result[0])
57
- elif isinstance(result, dict):
58
- if "generated_text" in result:
59
- return result["generated_text"]
60
- else:
61
- return str(result)
62
- else:
63
- return str(result)
64
  else:
65
- return f"Error {response.status_code}: {response.text}"
 
66
 
67
  except Exception as e:
68
  return f"Exception: {str(e)}"
69
 
70
- def collect_responses(prompt_text, max_tokens=500, temperature=0.7, retry_loading=True):
71
- """
72
- Collect responses from all models for a given prompt.
73
- Each model gets a fresh, independent query with no history.
74
- """
75
  results = []
76
- status_updates = []
77
 
78
  for model in MODELS:
79
- status_updates.append(f"⏳ Querying {model}...")
80
- yield "\n".join(status_updates), None, None
81
-
82
  response = query_model(model, prompt_text, max_tokens, temperature)
83
 
84
- # If model is loading and retry is enabled, wait and try again
85
- if retry_loading and "loading" in response.lower():
86
- status_updates[-1] = f"⏳ {model} is loading, waiting 20s..."
87
- yield "\n".join(status_updates), None, None
88
- time.sleep(20)
89
- response = query_model(model, prompt_text, max_tokens, temperature)
90
-
91
- result = {
92
  'timestamp': datetime.now().isoformat(),
93
  'prompt': prompt_text,
94
- 'model': model,
95
  'response': response
96
- }
97
- results.append(result)
98
-
99
- status_updates[-1] = f"✓ Completed {model}"
100
- yield "\n".join(status_updates), None, None
101
 
102
- # Create DataFrame
103
  df = pd.DataFrame(results)
104
-
105
- # Save to CSV
106
- csv_filename = f"llm_responses_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
107
  df.to_csv(csv_filename, index=False)
108
 
109
- status_updates.append(f"\n✅ All responses collected! Saved to {csv_filename}")
110
-
111
- yield "\n".join(status_updates), df, csv_filename
112
-
113
- def batch_collect_responses(prompts_text, max_tokens=500, temperature=0.7, retry_loading=True):
114
- """
115
- Collect responses for multiple prompts (one per line).
116
- Each prompt is processed independently with no conversation history.
117
- """
118
- prompts = [p.strip() for p in prompts_text.split('\n') if p.strip()]
119
-
120
- if not prompts:
121
- return "❌ Please enter at least one prompt", None, None
122
-
123
- all_results = []
124
- status_updates = []
125
-
126
- for i, prompt in enumerate(prompts, 1):
127
- status_updates.append(f"\n📝 Processing prompt {i}/{len(prompts)}: {prompt[:50]}...")
128
- yield "\n".join(status_updates), None, None
129
-
130
- for model in MODELS:
131
- status_updates.append(f" ⏳ Querying {model}...")
132
- yield "\n".join(status_updates), None, None
133
-
134
- response = query_model(model, prompt, max_tokens, temperature)
135
-
136
- # If model is loading and retry is enabled, wait and try again
137
- if retry_loading and "loading" in response.lower():
138
- status_updates[-1] = f" ⏳ {model} is loading, waiting 20s..."
139
- yield "\n".join(status_updates), None, None
140
- time.sleep(20)
141
- response = query_model(model, prompt, max_tokens, temperature)
142
-
143
- result = {
144
- 'timestamp': datetime.now().isoformat(),
145
- 'prompt': prompt,
146
- 'model': model,
147
- 'response': response
148
- }
149
- all_results.append(result)
150
-
151
- status_updates[-1] = f" ✓ Completed {model}"
152
- yield "\n".join(status_updates), None, None
153
-
154
- # Create DataFrame
155
- df = pd.DataFrame(all_results)
156
-
157
- # Save to CSV
158
- csv_filename = f"llm_responses_batch_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
159
- df.to_csv(csv_filename, index=False)
160
-
161
- status_updates.append(f"\n✅ All responses collected! Saved to {csv_filename}")
162
-
163
- yield "\n".join(status_updates), df, csv_filename
164
 
165
- # Create Gradio interface
166
- with gr.Blocks(title="Multi-LLM Response Collector") as demo:
167
  gr.Markdown("""
168
- # 🤖 Multi-LLM Response Collector (Free Tier)
169
 
170
- Collect and compare **one-shot** responses from four different LLMs:
171
- - Mistral 7B Instruct v0.2
172
- - Google Flan-T5 XXL
173
- - Microsoft DialoGPT Large
174
- - BigScience BLOOM 560M
175
 
176
- **Important:**
177
- - Each query is independent with no conversation history
178
- - Uses Hugging Face's free Serverless Inference API
179
- - Models may take 20+ seconds to load on first request
180
- - Free tier has rate limits (~100 requests/hour)
181
-
182
- Responses are saved to a CSV file for easy analysis.
183
  """)
184
 
185
- with gr.Tab("Single Prompt"):
186
- with gr.Row():
187
- with gr.Column():
188
- prompt_input = gr.Textbox(
189
- label="Enter your prompt",
190
- placeholder="e.g., What is artificial intelligence?",
191
- lines=3
192
- )
193
- max_tokens_single = gr.Slider(
194
- minimum=50,
195
- maximum=500,
196
- value=200,
197
- step=50,
198
- label="Max Tokens"
199
- )
200
- temperature_single = gr.Slider(
201
- minimum=0.0,
202
- maximum=2.0,
203
- value=0.7,
204
- step=0.1,
205
- label="Temperature (creativity)"
206
- )
207
- retry_single = gr.Checkbox(
208
- label="Auto-retry if model is loading",
209
- value=True
210
- )
211
- submit_btn = gr.Button("Collect Responses", variant="primary")
212
 
213
- status_output = gr.Textbox(label="Status", lines=6)
214
-
215
- with gr.Row():
216
- df_output = gr.Dataframe(label="Responses")
217
-
218
- csv_output = gr.File(label="Download CSV")
219
-
220
- submit_btn.click(
221
- fn=collect_responses,
222
- inputs=[prompt_input, max_tokens_single, temperature_single, retry_single],
223
- outputs=[status_output, df_output, csv_output]
224
- )
225
-
226
- with gr.Tab("Batch Prompts"):
227
- with gr.Row():
228
- with gr.Column():
229
- batch_input = gr.Textbox(
230
- label="Enter prompts (one per line)",
231
- placeholder="What is AI?\nExplain machine learning.\nWhat is deep learning?",
232
- lines=5
233
- )
234
- max_tokens_batch = gr.Slider(
235
  minimum=50,
236
  maximum=500,
237
- value=200,
238
  step=50,
239
- label="Max Tokens"
240
  )
241
- temperature_batch = gr.Slider(
242
  minimum=0.0,
243
- maximum=2.0,
244
  value=0.7,
245
  step=0.1,
246
- label="Temperature (creativity)"
247
- )
248
- retry_batch = gr.Checkbox(
249
- label="Auto-retry if model is loading",
250
- value=True
251
  )
252
- batch_btn = gr.Button("Collect Batch Responses", variant="primary")
253
-
254
- batch_status = gr.Textbox(label="Status", lines=10)
255
-
256
- with gr.Row():
257
- batch_df = gr.Dataframe(label="All Responses")
258
 
259
- batch_csv = gr.File(label="Download CSV")
260
-
261
- batch_btn.click(
262
- fn=batch_collect_responses,
263
- inputs=[batch_input, max_tokens_batch, temperature_batch, retry_batch],
264
- outputs=[batch_status, batch_df, batch_csv]
265
- )
 
 
 
266
 
267
  gr.Markdown("""
268
  ---
269
- ### 📊 CSV Format
270
- The output CSV contains:
271
- - `timestamp`: When the response was generated
272
- - `prompt`: The input prompt
273
- - `model`: Which model generated the response
274
- - `response`: The model's response
275
-
276
- ### ⚠️ Free Tier Limitations
277
- - Rate limit: ~100 requests/hour
278
- - Models may take 20+ seconds to load on first use
279
- - Some large models may not be available
280
- - For production use, consider Hugging Face Pro ($9/month)
281
  """)
282
 
283
  if __name__ == "__main__":
 
3
  import os
4
  from datetime import datetime
5
  import pandas as pd
 
6
 
7
+ # Hugging Face token from environment
8
  HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
9
 
10
+ # Four different LLM models to compare
11
  MODELS = [
12
+ "meta-llama/Llama-3.2-3B-Instruct",
13
+ "mistralai/Mistral-7B-Instruct-v0.3",
14
+ "microsoft/Phi-3-mini-4k-instruct",
15
+ "Qwen/Qwen2.5-7B-Instruct"
16
  ]
17
 
18
+ def query_model(model_id, prompt, max_tokens=300, temperature=0.7):
19
+ """Send prompt to model and get response"""
20
+ API_URL = "https://router.huggingface.co/v1/chat/completions"
21
+ headers = {
22
+ "Authorization": f"Bearer {HF_TOKEN}",
23
+ "Content-Type": "application/json"
24
+ }
25
 
26
  payload = {
27
+ "model": model_id,
28
+ "messages": [{"role": "user", "content": prompt}],
29
+ "max_tokens": max_tokens,
30
+ "temperature": temperature,
31
+ "stream": False
 
32
  }
33
 
34
  try:
35
+ response = requests.post(API_URL, headers=headers, json=payload, timeout=60)
 
 
 
 
 
 
 
 
36
 
37
  if response.status_code == 200:
38
  result = response.json()
39
+ return result["choices"][0]["message"]["content"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  else:
41
+ error_detail = response.json() if response.text else response.text
42
+ return f"Error {response.status_code}: {error_detail}"
43
 
44
  except Exception as e:
45
  return f"Exception: {str(e)}"
46
 
47
+ def collect_responses(prompt_text, max_tokens=300, temperature=0.7):
48
+ """Collect responses from all models"""
 
 
 
49
  results = []
 
50
 
51
  for model in MODELS:
 
 
 
52
  response = query_model(model, prompt_text, max_tokens, temperature)
53
 
54
+ results.append({
 
 
 
 
 
 
 
55
  'timestamp': datetime.now().isoformat(),
56
  'prompt': prompt_text,
57
+ 'model': model.split('/')[-1], # Short model name
58
  'response': response
59
+ })
 
 
 
 
60
 
 
61
  df = pd.DataFrame(results)
62
+ csv_filename = f"responses_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
 
 
63
  df.to_csv(csv_filename, index=False)
64
 
65
+ return df, csv_filename
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
+ # Gradio interface
68
+ with gr.Blocks(title="LLM Response Collector") as demo:
69
  gr.Markdown("""
70
+ # 🤖 Multi-LLM Response Collector
71
 
72
+ Compare responses from 4 different LLMs:
73
+ - **Llama 3.2 3B** - Meta's instruction-tuned model
74
+ - **Mistral 7B** - Open source conversational model
75
+ - **Phi-3 Mini** - Microsoft's efficient model
76
+ - **Qwen 2.5 7B** - Alibaba's multilingual model
77
 
78
+ Each query is independent with no conversation history.
 
 
 
 
 
 
79
  """)
80
 
81
+ with gr.Row():
82
+ with gr.Column():
83
+ prompt_input = gr.Textbox(
84
+ label="Enter your prompt",
85
+ placeholder="What is artificial intelligence?",
86
+ lines=4
87
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
+ with gr.Row():
90
+ max_tokens = gr.Slider(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  minimum=50,
92
  maximum=500,
93
+ value=300,
94
  step=50,
95
+ label="Max Response Length"
96
  )
97
+ temperature = gr.Slider(
98
  minimum=0.0,
99
+ maximum=1.5,
100
  value=0.7,
101
  step=0.1,
102
+ label="Temperature"
 
 
 
 
103
  )
 
 
 
 
 
 
104
 
105
+ submit_btn = gr.Button("Get Responses", variant="primary", size="lg")
106
+
107
+ df_output = gr.Dataframe(label="Results", wrap=True)
108
+ csv_output = gr.File(label="Download CSV")
109
+
110
+ submit_btn.click(
111
+ fn=collect_responses,
112
+ inputs=[prompt_input, max_tokens, temperature],
113
+ outputs=[df_output, csv_output]
114
+ )
115
 
116
  gr.Markdown("""
117
  ---
118
+ ### 📝 About
119
+ - Uses Hugging Face Router API
120
+ - Each response is independent (no chat history)
121
+ - Results saved to CSV for analysis
 
 
 
 
 
 
 
 
122
  """)
123
 
124
  if __name__ == "__main__":