LLMLP / app.py
kambris's picture
Update app.py
6eaab2f verified
import gradio as gr
import requests
import os
from datetime import datetime
import pandas as pd
# Hugging Face token from environment
HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
# Four different LLM models from USA and China
MODELS = [
"meta-llama/Llama-3.3-70B-Instruct", # πŸ‡ΊπŸ‡Έ USA (Meta)
"google/gemma-2-9b-it", # πŸ‡ΊπŸ‡Έ USA (Google)
"Qwen/Qwen2.5-72B-Instruct", # πŸ‡¨πŸ‡³ China (Alibaba)
"deepseek-ai/DeepSeek-R1", # πŸ‡¨πŸ‡³ China (DeepSeek)
]
def query_model(model_id, prompt, max_tokens=300, temperature=0.7):
"""Send prompt to model and get response"""
API_URL = "https://router.huggingface.co/v1/chat/completions"
headers = {
"Authorization": f"Bearer {HF_TOKEN}",
"Content-Type": "application/json"
}
payload = {
"model": model_id,
"messages": [{"role": "user", "content": prompt}],
"max_tokens": max_tokens,
"temperature": temperature,
"stream": False
}
try:
response = requests.post(API_URL, headers=headers, json=payload, timeout=60)
if response.status_code == 200:
result = response.json()
return result["choices"][0]["message"]["content"]
else:
error_detail = response.json() if response.text else response.text
return f"Error {response.status_code}: {error_detail}"
except Exception as e:
return f"Exception: {str(e)}"
def collect_batch_responses(prompts_text, max_tokens=300, temperature=0.7):
"""Collect responses from all models for multiple prompts"""
# Split prompts by newline and filter empty lines
prompts = [p.strip() for p in prompts_text.split('\n') if p.strip()]
if not prompts:
return pd.DataFrame(), None, "⚠️ No prompts provided"
results = []
status_msg = f"Processing {len(prompts)} prompt(s) across {len(MODELS)} model(s)..."
# Process each prompt independently
for prompt_idx, prompt_text in enumerate(prompts, 1):
# Each prompt gets fresh responses from all models
for model in MODELS:
response = query_model(model, prompt_text, max_tokens, temperature)
results.append({
'timestamp': datetime.now().isoformat(),
'prompt_number': prompt_idx,
'prompt': prompt_text,
'model': model.split('/')[-1], # Short model name
'full_model': model,
'response': response
})
df = pd.DataFrame(results)
csv_filename = f"batch_responses_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
df.to_csv(csv_filename, index=False)
completion_msg = f"βœ… Completed! Processed {len(prompts)} prompt(s) Γ— {len(MODELS)} models = {len(results)} total responses"
return df, csv_filename, completion_msg
# Gradio interface
with gr.Blocks(title="Batch LLM Response Collector") as demo:
gr.Markdown("""
# πŸ€– Batch Multi-LLM Response Collector
Compare responses from 4 different LLMs (2 US, 2 China):
- **Llama 3.3 70B** πŸ‡ΊπŸ‡Έ - Meta's latest model (USA)
- **Gemma 2 9B** πŸ‡ΊπŸ‡Έ - Google's efficient model (USA)
- **Qwen 2.5 72B** πŸ‡¨πŸ‡³ - Alibaba's flagship model (China)
- **DeepSeek R1** πŸ‡¨πŸ‡³ - DeepSeek's reasoning model (China)
**Batch Processing:** Enter multiple prompts (one per line). Each prompt is processed independently with no conversation history or cross-contamination.
""")
with gr.Row():
with gr.Column():
prompt_input = gr.Textbox(
label="Enter your prompts (one per line)",
placeholder="What is artificial intelligence?\nExplain quantum computing.\nDescribe machine learning.",
lines=8
)
with gr.Row():
max_tokens = gr.Slider(
minimum=50,
maximum=500,
value=300,
step=50,
label="Max Response Length"
)
temperature = gr.Slider(
minimum=0.0,
maximum=1.5,
value=0.7,
step=0.1,
label="Temperature"
)
submit_btn = gr.Button("Process Batch", variant="primary", size="lg")
status_output = gr.Textbox(label="Status", interactive=False)
df_output = gr.Dataframe(label="Results", wrap=True)
csv_output = gr.File(label="Download CSV")
submit_btn.click(
fn=collect_batch_responses,
inputs=[prompt_input, max_tokens, temperature],
outputs=[df_output, csv_output, status_output]
)
gr.Markdown("""
---
### πŸ“ About
- Uses Hugging Face Router API
- **Each prompt is completely independent** - no conversation history
- Multiple prompts processed sequentially (one per line)
- Each prompt gets fresh responses from all 4 models
- Results include prompt_number for easy tracking
- All results saved to timestamped CSV for analysis
### πŸ’‘ Tips
- Separate prompts with line breaks
- Empty lines are automatically ignored
- Processing time scales with: (number of prompts) Γ— (number of models)
""")
if __name__ == "__main__":
demo.launch()