import gradio as gr import requests import os from datetime import datetime import pandas as pd # Hugging Face token from environment HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN") # Four different LLM models from USA and China MODELS = [ "meta-llama/Llama-3.3-70B-Instruct", # πŸ‡ΊπŸ‡Έ USA (Meta) "google/gemma-2-9b-it", # πŸ‡ΊπŸ‡Έ USA (Google) "Qwen/Qwen2.5-72B-Instruct", # πŸ‡¨πŸ‡³ China (Alibaba) "deepseek-ai/DeepSeek-R1", # πŸ‡¨πŸ‡³ China (DeepSeek) ] def query_model(model_id, prompt, max_tokens=300, temperature=0.7): """Send prompt to model and get response""" API_URL = "https://router.huggingface.co/v1/chat/completions" headers = { "Authorization": f"Bearer {HF_TOKEN}", "Content-Type": "application/json" } payload = { "model": model_id, "messages": [{"role": "user", "content": prompt}], "max_tokens": max_tokens, "temperature": temperature, "stream": False } try: response = requests.post(API_URL, headers=headers, json=payload, timeout=60) if response.status_code == 200: result = response.json() return result["choices"][0]["message"]["content"] else: error_detail = response.json() if response.text else response.text return f"Error {response.status_code}: {error_detail}" except Exception as e: return f"Exception: {str(e)}" def collect_batch_responses(prompts_text, max_tokens=300, temperature=0.7): """Collect responses from all models for multiple prompts""" # Split prompts by newline and filter empty lines prompts = [p.strip() for p in prompts_text.split('\n') if p.strip()] if not prompts: return pd.DataFrame(), None, "⚠️ No prompts provided" results = [] status_msg = f"Processing {len(prompts)} prompt(s) across {len(MODELS)} model(s)..." # Process each prompt independently for prompt_idx, prompt_text in enumerate(prompts, 1): # Each prompt gets fresh responses from all models for model in MODELS: response = query_model(model, prompt_text, max_tokens, temperature) results.append({ 'timestamp': datetime.now().isoformat(), 'prompt_number': prompt_idx, 'prompt': prompt_text, 'model': model.split('/')[-1], # Short model name 'full_model': model, 'response': response }) df = pd.DataFrame(results) csv_filename = f"batch_responses_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv" df.to_csv(csv_filename, index=False) completion_msg = f"βœ… Completed! Processed {len(prompts)} prompt(s) Γ— {len(MODELS)} models = {len(results)} total responses" return df, csv_filename, completion_msg # Gradio interface with gr.Blocks(title="Batch LLM Response Collector") as demo: gr.Markdown(""" # πŸ€– Batch Multi-LLM Response Collector Compare responses from 4 different LLMs (2 US, 2 China): - **Llama 3.3 70B** πŸ‡ΊπŸ‡Έ - Meta's latest model (USA) - **Gemma 2 9B** πŸ‡ΊπŸ‡Έ - Google's efficient model (USA) - **Qwen 2.5 72B** πŸ‡¨πŸ‡³ - Alibaba's flagship model (China) - **DeepSeek R1** πŸ‡¨πŸ‡³ - DeepSeek's reasoning model (China) **Batch Processing:** Enter multiple prompts (one per line). Each prompt is processed independently with no conversation history or cross-contamination. """) with gr.Row(): with gr.Column(): prompt_input = gr.Textbox( label="Enter your prompts (one per line)", placeholder="What is artificial intelligence?\nExplain quantum computing.\nDescribe machine learning.", lines=8 ) with gr.Row(): max_tokens = gr.Slider( minimum=50, maximum=500, value=300, step=50, label="Max Response Length" ) temperature = gr.Slider( minimum=0.0, maximum=1.5, value=0.7, step=0.1, label="Temperature" ) submit_btn = gr.Button("Process Batch", variant="primary", size="lg") status_output = gr.Textbox(label="Status", interactive=False) df_output = gr.Dataframe(label="Results", wrap=True) csv_output = gr.File(label="Download CSV") submit_btn.click( fn=collect_batch_responses, inputs=[prompt_input, max_tokens, temperature], outputs=[df_output, csv_output, status_output] ) gr.Markdown(""" --- ### πŸ“ About - Uses Hugging Face Router API - **Each prompt is completely independent** - no conversation history - Multiple prompts processed sequentially (one per line) - Each prompt gets fresh responses from all 4 models - Results include prompt_number for easy tracking - All results saved to timestamped CSV for analysis ### πŸ’‘ Tips - Separate prompts with line breaks - Empty lines are automatically ignored - Processing time scales with: (number of prompts) Γ— (number of models) """) if __name__ == "__main__": demo.launch()