Spaces:

kambris
/

LLMLP

Sleeping

File size: 5,454 Bytes

9bc63b4
06bd57c
9bc63b4
 
 
 
e3e3e14
9bc63b4
 
6c9529f
9bc63b4
6c9529f
 
 
 
9bc63b4
 
e3e3e14
 
 
 
 
 
 
06bd57c
 
e3e3e14
 
 
 
 
06bd57c
 
9bc63b4
e3e3e14
06bd57c
 
 
e3e3e14
06bd57c
e3e3e14
 
06bd57c
9bc63b4
06bd57c
9bc63b4
6eaab2f
 
 
 
 
 
 
 
9bc63b4
6eaab2f
9bc63b4
6eaab2f
 
 
 
 
 
 
 
 
 
 
 
 
 
9bc63b4
 
6eaab2f
9bc63b4
 
6eaab2f
 
 
9bc63b4
e3e3e14
6eaab2f
9bc63b4
6eaab2f
9bc63b4
6c9529f
e8c22e1
6c9529f
e8c22e1
 
9bc63b4
6eaab2f
9bc63b4
 
e3e3e14
 
 
6eaab2f
 
 
e3e3e14
9bc63b4
e3e3e14
 
06bd57c
 
e3e3e14
9bc63b4
e3e3e14
9bc63b4
e3e3e14
9bc63b4
e3e3e14
9bc63b4
 
e3e3e14
06bd57c
9bc63b4
6eaab2f
e3e3e14
6eaab2f
e3e3e14
 
 
 
6eaab2f
e3e3e14
6eaab2f
e3e3e14
9bc63b4
 
 
e3e3e14
 
6eaab2f
 
 
 
 
 
 
 
 
 
9bc63b4

import gradio as gr
import requests
import os
from datetime import datetime
import pandas as pd

# Hugging Face token from environment
HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")

# Four different LLM models from USA and China
MODELS = [
    "meta-llama/Llama-3.3-70B-Instruct",    # 🇺🇸 USA (Meta)
    "google/gemma-2-9b-it",                 # 🇺🇸 USA (Google)
    "Qwen/Qwen2.5-72B-Instruct",            # 🇨🇳 China (Alibaba)
    "deepseek-ai/DeepSeek-R1",              # 🇨🇳 China (DeepSeek)
]

def query_model(model_id, prompt, max_tokens=300, temperature=0.7):
    """Send prompt to model and get response"""
    API_URL = "https://router.huggingface.co/v1/chat/completions"
    headers = {
        "Authorization": f"Bearer {HF_TOKEN}",
        "Content-Type": "application/json"
    }
    
    payload = {
        "model": model_id,
        "messages": [{"role": "user", "content": prompt}],
        "max_tokens": max_tokens,
        "temperature": temperature,
        "stream": False
    }
    
    try:
        response = requests.post(API_URL, headers=headers, json=payload, timeout=60)
        
        if response.status_code == 200:
            result = response.json()
            return result["choices"][0]["message"]["content"]
        else:
            error_detail = response.json() if response.text else response.text
            return f"Error {response.status_code}: {error_detail}"
            
    except Exception as e:
        return f"Exception: {str(e)}"

def collect_batch_responses(prompts_text, max_tokens=300, temperature=0.7):
    """Collect responses from all models for multiple prompts"""
    # Split prompts by newline and filter empty lines
    prompts = [p.strip() for p in prompts_text.split('\n') if p.strip()]
    
    if not prompts:
        return pd.DataFrame(), None, "⚠️ No prompts provided"
    
    results = []
    status_msg = f"Processing {len(prompts)} prompt(s) across {len(MODELS)} model(s)..."
    
    # Process each prompt independently
    for prompt_idx, prompt_text in enumerate(prompts, 1):
        # Each prompt gets fresh responses from all models
        for model in MODELS:
            response = query_model(model, prompt_text, max_tokens, temperature)
            
            results.append({
                'timestamp': datetime.now().isoformat(),
                'prompt_number': prompt_idx,
                'prompt': prompt_text,
                'model': model.split('/')[-1],  # Short model name
                'full_model': model,
                'response': response
            })
    
    df = pd.DataFrame(results)
    csv_filename = f"batch_responses_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
    df.to_csv(csv_filename, index=False)
    
    completion_msg = f"✅ Completed! Processed {len(prompts)} prompt(s) × {len(MODELS)} models = {len(results)} total responses"
    
    return df, csv_filename, completion_msg

# Gradio interface
with gr.Blocks(title="Batch LLM Response Collector") as demo:
    gr.Markdown("""
    # 🤖 Batch Multi-LLM Response Collector
    
    Compare responses from 4 different LLMs (2 US, 2 China):
    - **Llama 3.3 70B** 🇺🇸 - Meta's latest model (USA)
    - **Gemma 2 9B** 🇺🇸 - Google's efficient model (USA)
    - **Qwen 2.5 72B** 🇨🇳 - Alibaba's flagship model (China)
    - **DeepSeek R1** 🇨🇳 - DeepSeek's reasoning model (China)
    
    **Batch Processing:** Enter multiple prompts (one per line). Each prompt is processed independently with no conversation history or cross-contamination.
    """)
    
    with gr.Row():
        with gr.Column():
            prompt_input = gr.Textbox(
                label="Enter your prompts (one per line)",
                placeholder="What is artificial intelligence?\nExplain quantum computing.\nDescribe machine learning.",
                lines=8
            )
            
            with gr.Row():
                max_tokens = gr.Slider(
                    minimum=50,
                    maximum=500,
                    value=300,
                    step=50,
                    label="Max Response Length"
                )
                temperature = gr.Slider(
                    minimum=0.0,
                    maximum=1.5,
                    value=0.7,
                    step=0.1,
                    label="Temperature"
                )
            
            submit_btn = gr.Button("Process Batch", variant="primary", size="lg")
    
    status_output = gr.Textbox(label="Status", interactive=False)
    df_output = gr.Dataframe(label="Results", wrap=True)
    csv_output = gr.File(label="Download CSV")
    
    submit_btn.click(
        fn=collect_batch_responses,
        inputs=[prompt_input, max_tokens, temperature],
        outputs=[df_output, csv_output, status_output]
    )
    
    gr.Markdown("""
    ---
    ### 📝 About
    - Uses Hugging Face Router API
    - **Each prompt is completely independent** - no conversation history
    - Multiple prompts processed sequentially (one per line)
    - Each prompt gets fresh responses from all 4 models
    - Results include prompt_number for easy tracking
    - All results saved to timestamped CSV for analysis
    
    ### 💡 Tips
    - Separate prompts with line breaks
    - Empty lines are automatically ignored
    - Processing time scales with: (number of prompts) × (number of models)
    """)

if __name__ == "__main__":
    demo.launch()