|
|
import gradio as gr |
|
|
import requests |
|
|
import os |
|
|
from datetime import datetime |
|
|
import pandas as pd |
|
|
|
|
|
|
|
|
HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN") |
|
|
|
|
|
|
|
|
MODELS = [ |
|
|
"meta-llama/Llama-3.3-70B-Instruct", |
|
|
"google/gemma-2-9b-it", |
|
|
"Qwen/Qwen2.5-72B-Instruct", |
|
|
"deepseek-ai/DeepSeek-R1", |
|
|
] |
|
|
|
|
|
def query_model(model_id, prompt, max_tokens=300, temperature=0.7): |
|
|
"""Send prompt to model and get response""" |
|
|
API_URL = "https://router.huggingface.co/v1/chat/completions" |
|
|
headers = { |
|
|
"Authorization": f"Bearer {HF_TOKEN}", |
|
|
"Content-Type": "application/json" |
|
|
} |
|
|
|
|
|
payload = { |
|
|
"model": model_id, |
|
|
"messages": [{"role": "user", "content": prompt}], |
|
|
"max_tokens": max_tokens, |
|
|
"temperature": temperature, |
|
|
"stream": False |
|
|
} |
|
|
|
|
|
try: |
|
|
response = requests.post(API_URL, headers=headers, json=payload, timeout=60) |
|
|
|
|
|
if response.status_code == 200: |
|
|
result = response.json() |
|
|
return result["choices"][0]["message"]["content"] |
|
|
else: |
|
|
error_detail = response.json() if response.text else response.text |
|
|
return f"Error {response.status_code}: {error_detail}" |
|
|
|
|
|
except Exception as e: |
|
|
return f"Exception: {str(e)}" |
|
|
|
|
|
def collect_batch_responses(prompts_text, max_tokens=300, temperature=0.7): |
|
|
"""Collect responses from all models for multiple prompts""" |
|
|
|
|
|
prompts = [p.strip() for p in prompts_text.split('\n') if p.strip()] |
|
|
|
|
|
if not prompts: |
|
|
return pd.DataFrame(), None, "β οΈ No prompts provided" |
|
|
|
|
|
results = [] |
|
|
status_msg = f"Processing {len(prompts)} prompt(s) across {len(MODELS)} model(s)..." |
|
|
|
|
|
|
|
|
for prompt_idx, prompt_text in enumerate(prompts, 1): |
|
|
|
|
|
for model in MODELS: |
|
|
response = query_model(model, prompt_text, max_tokens, temperature) |
|
|
|
|
|
results.append({ |
|
|
'timestamp': datetime.now().isoformat(), |
|
|
'prompt_number': prompt_idx, |
|
|
'prompt': prompt_text, |
|
|
'model': model.split('/')[-1], |
|
|
'full_model': model, |
|
|
'response': response |
|
|
}) |
|
|
|
|
|
df = pd.DataFrame(results) |
|
|
csv_filename = f"batch_responses_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv" |
|
|
df.to_csv(csv_filename, index=False) |
|
|
|
|
|
completion_msg = f"β
Completed! Processed {len(prompts)} prompt(s) Γ {len(MODELS)} models = {len(results)} total responses" |
|
|
|
|
|
return df, csv_filename, completion_msg |
|
|
|
|
|
|
|
|
with gr.Blocks(title="Batch LLM Response Collector") as demo: |
|
|
gr.Markdown(""" |
|
|
# π€ Batch Multi-LLM Response Collector |
|
|
|
|
|
Compare responses from 4 different LLMs (2 US, 2 China): |
|
|
- **Llama 3.3 70B** πΊπΈ - Meta's latest model (USA) |
|
|
- **Gemma 2 9B** πΊπΈ - Google's efficient model (USA) |
|
|
- **Qwen 2.5 72B** π¨π³ - Alibaba's flagship model (China) |
|
|
- **DeepSeek R1** π¨π³ - DeepSeek's reasoning model (China) |
|
|
|
|
|
**Batch Processing:** Enter multiple prompts (one per line). Each prompt is processed independently with no conversation history or cross-contamination. |
|
|
""") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
prompt_input = gr.Textbox( |
|
|
label="Enter your prompts (one per line)", |
|
|
placeholder="What is artificial intelligence?\nExplain quantum computing.\nDescribe machine learning.", |
|
|
lines=8 |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
max_tokens = gr.Slider( |
|
|
minimum=50, |
|
|
maximum=500, |
|
|
value=300, |
|
|
step=50, |
|
|
label="Max Response Length" |
|
|
) |
|
|
temperature = gr.Slider( |
|
|
minimum=0.0, |
|
|
maximum=1.5, |
|
|
value=0.7, |
|
|
step=0.1, |
|
|
label="Temperature" |
|
|
) |
|
|
|
|
|
submit_btn = gr.Button("Process Batch", variant="primary", size="lg") |
|
|
|
|
|
status_output = gr.Textbox(label="Status", interactive=False) |
|
|
df_output = gr.Dataframe(label="Results", wrap=True) |
|
|
csv_output = gr.File(label="Download CSV") |
|
|
|
|
|
submit_btn.click( |
|
|
fn=collect_batch_responses, |
|
|
inputs=[prompt_input, max_tokens, temperature], |
|
|
outputs=[df_output, csv_output, status_output] |
|
|
) |
|
|
|
|
|
gr.Markdown(""" |
|
|
--- |
|
|
### π About |
|
|
- Uses Hugging Face Router API |
|
|
- **Each prompt is completely independent** - no conversation history |
|
|
- Multiple prompts processed sequentially (one per line) |
|
|
- Each prompt gets fresh responses from all 4 models |
|
|
- Results include prompt_number for easy tracking |
|
|
- All results saved to timestamped CSV for analysis |
|
|
|
|
|
### π‘ Tips |
|
|
- Separate prompts with line breaks |
|
|
- Empty lines are automatically ignored |
|
|
- Processing time scales with: (number of prompts) Γ (number of models) |
|
|
""") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |