Spaces:

kambris
/

LLMLP

Sleeping

App Files Files Community

kambris commited on Dec 2, 2025

Commit

e3e3e14

verified ·

1 Parent(s): 22a1949

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -225

app.py CHANGED Viewed

@@ -3,281 +3,122 @@ import requests
 import os
 from datetime import datetime
 import pandas as pd
-import time
-# Initialize with your token
 HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
-# Use models that work with the free Serverless Inference API
 MODELS = [
-    "mistralai/Mistral-7B-Instruct-v0.2",
-    "google/flan-t5-xxl",
-    "microsoft/DialoGPT-large",
-    "bigscience/bloom-560m"
 ]
-def query_model(model_id, prompt, max_tokens=500, temperature=0.7):
-    """
-    Query a model using the direct Inference API endpoint
-    """
-    API_URL = f"https://router.huggingface.co/models/{model_id}"
-    headers = {"Authorization": f"Bearer {HF_TOKEN}"}
     payload = {
-        "inputs": prompt,
-        "parameters": {
-            "max_new_tokens": max_tokens,
-            "temperature": temperature,
-            "return_full_text": False
-        }
     }
     try:
-        response = requests.post(API_URL, headers=headers, json=payload)
-        # Handle model loading (503 error)
-        if response.status_code == 503:
-            result = response.json()
-            if "estimated_time" in result:
-                wait_time = result["estimated_time"]
-                return f"Model is loading... estimated wait: {wait_time}s. Please try again."
-            return "Model is currently loading. Please try again in a moment."
         if response.status_code == 200:
             result = response.json()
-            # Handle different response formats
-            if isinstance(result, list) and len(result) > 0:
-                if "generated_text" in result[0]:
-                    return result[0]["generated_text"]
-                elif "translation_text" in result[0]:
-                    return result[0]["translation_text"]
-                else:
-                    return str(result[0])
-            elif isinstance(result, dict):
-                if "generated_text" in result:
-                    return result["generated_text"]
-                else:
-                    return str(result)
-            else:
-                return str(result)
         else:
-            return f"Error {response.status_code}: {response.text}"
     except Exception as e:
         return f"Exception: {str(e)}"
-def collect_responses(prompt_text, max_tokens=500, temperature=0.7, retry_loading=True):
-    """
-    Collect responses from all models for a given prompt.
-    Each model gets a fresh, independent query with no history.
-    """
     results = []
-    status_updates = []
     for model in MODELS:
-        status_updates.append(f"⏳ Querying {model}...")
-        yield "\n".join(status_updates), None, None
         response = query_model(model, prompt_text, max_tokens, temperature)
-        # If model is loading and retry is enabled, wait and try again
-        if retry_loading and "loading" in response.lower():
-            status_updates[-1] = f"⏳ {model} is loading, waiting 20s..."
-            yield "\n".join(status_updates), None, None
-            time.sleep(20)
-            response = query_model(model, prompt_text, max_tokens, temperature)
-        result = {
             'timestamp': datetime.now().isoformat(),
             'prompt': prompt_text,
-            'model': model,
             'response': response
-        }
-        results.append(result)
-        status_updates[-1] = f"✓ Completed {model}"
-        yield "\n".join(status_updates), None, None
-    # Create DataFrame
     df = pd.DataFrame(results)
-    # Save to CSV
-    csv_filename = f"llm_responses_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
     df.to_csv(csv_filename, index=False)
-    status_updates.append(f"\n✅ All responses collected! Saved to {csv_filename}")
-    yield "\n".join(status_updates), df, csv_filename
-def batch_collect_responses(prompts_text, max_tokens=500, temperature=0.7, retry_loading=True):
-    """
-    Collect responses for multiple prompts (one per line).
-    Each prompt is processed independently with no conversation history.
-    """
-    prompts = [p.strip() for p in prompts_text.split('\n') if p.strip()]
-    if not prompts:
-        return "❌ Please enter at least one prompt", None, None
-    all_results = []
-    status_updates = []
-    for i, prompt in enumerate(prompts, 1):
-        status_updates.append(f"\n📝 Processing prompt {i}/{len(prompts)}: {prompt[:50]}...")
-        yield "\n".join(status_updates), None, None
-        for model in MODELS:
-            status_updates.append(f"  ⏳ Querying {model}...")
-            yield "\n".join(status_updates), None, None
-            response = query_model(model, prompt, max_tokens, temperature)
-            # If model is loading and retry is enabled, wait and try again
-            if retry_loading and "loading" in response.lower():
-                status_updates[-1] = f"  ⏳ {model} is loading, waiting 20s..."
-                yield "\n".join(status_updates), None, None
-                time.sleep(20)
-                response = query_model(model, prompt, max_tokens, temperature)
-            result = {
-                'timestamp': datetime.now().isoformat(),
-                'prompt': prompt,
-                'model': model,
-                'response': response
-            }
-            all_results.append(result)
-            status_updates[-1] = f"  ✓ Completed {model}"
-            yield "\n".join(status_updates), None, None
-    # Create DataFrame
-    df = pd.DataFrame(all_results)
-    # Save to CSV
-    csv_filename = f"llm_responses_batch_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
-    df.to_csv(csv_filename, index=False)
-    status_updates.append(f"\n✅ All responses collected! Saved to {csv_filename}")
-    yield "\n".join(status_updates), df, csv_filename
-# Create Gradio interface
-with gr.Blocks(title="Multi-LLM Response Collector") as demo:
     gr.Markdown("""
-    # 🤖 Multi-LLM Response Collector (Free Tier)
-    Collect and compare **one-shot** responses from four different LLMs:
-    - Mistral 7B Instruct v0.2
-    - Google Flan-T5 XXL
-    - Microsoft DialoGPT Large
-    - BigScience BLOOM 560M
-    **Important:**
-    - Each query is independent with no conversation history
-    - Uses Hugging Face's free Serverless Inference API
-    - Models may take 20+ seconds to load on first request
-    - Free tier has rate limits (~100 requests/hour)
-    Responses are saved to a CSV file for easy analysis.
     """)
-    with gr.Tab("Single Prompt"):
-        with gr.Row():
-            with gr.Column():
-                prompt_input = gr.Textbox(
-                    label="Enter your prompt",
-                    placeholder="e.g., What is artificial intelligence?",
-                    lines=3
-                )
-                max_tokens_single = gr.Slider(
-                    minimum=50,
-                    maximum=500,
-                    value=200,
-                    step=50,
-                    label="Max Tokens"
-                )
-                temperature_single = gr.Slider(
-                    minimum=0.0,
-                    maximum=2.0,
-                    value=0.7,
-                    step=0.1,
-                    label="Temperature (creativity)"
-                )
-                retry_single = gr.Checkbox(
-                    label="Auto-retry if model is loading",
-                    value=True
-                )
-                submit_btn = gr.Button("Collect Responses", variant="primary")
-        status_output = gr.Textbox(label="Status", lines=6)
-        with gr.Row():
-            df_output = gr.Dataframe(label="Responses")
-        csv_output = gr.File(label="Download CSV")
-        submit_btn.click(
-            fn=collect_responses,
-            inputs=[prompt_input, max_tokens_single, temperature_single, retry_single],
-            outputs=[status_output, df_output, csv_output]
-        )
-    with gr.Tab("Batch Prompts"):
-        with gr.Row():
-            with gr.Column():
-                batch_input = gr.Textbox(
-                    label="Enter prompts (one per line)",
-                    placeholder="What is AI?\nExplain machine learning.\nWhat is deep learning?",
-                    lines=5
-                )
-                max_tokens_batch = gr.Slider(
                     minimum=50,
                     maximum=500,
-                    value=200,
                     step=50,
-                    label="Max Tokens"
                 )
-                temperature_batch = gr.Slider(
                     minimum=0.0,
-                    maximum=2.0,
                     value=0.7,
                     step=0.1,
-                    label="Temperature (creativity)"
-                )
-                retry_batch = gr.Checkbox(
-                    label="Auto-retry if model is loading",
-                    value=True
                 )
-                batch_btn = gr.Button("Collect Batch Responses", variant="primary")
-        batch_status = gr.Textbox(label="Status", lines=10)
-        with gr.Row():
-            batch_df = gr.Dataframe(label="All Responses")
-        batch_csv = gr.File(label="Download CSV")
-        batch_btn.click(
-            fn=batch_collect_responses,
-            inputs=[batch_input, max_tokens_batch, temperature_batch, retry_batch],
-            outputs=[batch_status, batch_df, batch_csv]
-        )
     gr.Markdown("""
     ---
-    ### 📊 CSV Format
-    The output CSV contains:
-    - `timestamp`: When the response was generated
-    - `prompt`: The input prompt
-    - `model`: Which model generated the response
-    - `response`: The model's response
-    ### ⚠️ Free Tier Limitations
-    - Rate limit: ~100 requests/hour
-    - Models may take 20+ seconds to load on first use
-    - Some large models may not be available
-    - For production use, consider Hugging Face Pro ($9/month)
     """)
 if __name__ == "__main__":

 import os
 from datetime import datetime
 import pandas as pd
+# Hugging Face token from environment
 HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
+# Four different LLM models to compare
 MODELS = [
+    "meta-llama/Llama-3.2-3B-Instruct",
+    "mistralai/Mistral-7B-Instruct-v0.3",
+    "microsoft/Phi-3-mini-4k-instruct",
+    "Qwen/Qwen2.5-7B-Instruct"
 ]
+def query_model(model_id, prompt, max_tokens=300, temperature=0.7):
+    """Send prompt to model and get response"""
+    API_URL = "https://router.huggingface.co/v1/chat/completions"
+    headers = {
+        "Authorization": f"Bearer {HF_TOKEN}",
+        "Content-Type": "application/json"
+    }
     payload = {
+        "model": model_id,
+        "messages": [{"role": "user", "content": prompt}],
+        "max_tokens": max_tokens,
+        "temperature": temperature,
+        "stream": False
     }
     try:
+        response = requests.post(API_URL, headers=headers, json=payload, timeout=60)
         if response.status_code == 200:
             result = response.json()
+            return result["choices"][0]["message"]["content"]
         else:
+            error_detail = response.json() if response.text else response.text
+            return f"Error {response.status_code}: {error_detail}"
     except Exception as e:
         return f"Exception: {str(e)}"
+def collect_responses(prompt_text, max_tokens=300, temperature=0.7):
+    """Collect responses from all models"""
     results = []
     for model in MODELS:
         response = query_model(model, prompt_text, max_tokens, temperature)
+        results.append({
             'timestamp': datetime.now().isoformat(),
             'prompt': prompt_text,
+            'model': model.split('/')[-1],  # Short model name
             'response': response
+        })
     df = pd.DataFrame(results)
+    csv_filename = f"responses_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
     df.to_csv(csv_filename, index=False)
+    return df, csv_filename
+# Gradio interface
+with gr.Blocks(title="LLM Response Collector") as demo:
     gr.Markdown("""
+    # 🤖 Multi-LLM Response Collector
+    Compare responses from 4 different LLMs:
+    - **Llama 3.2 3B** - Meta's instruction-tuned model
+    - **Mistral 7B** - Open source conversational model
+    - **Phi-3 Mini** - Microsoft's efficient model
+    - **Qwen 2.5 7B** - Alibaba's multilingual model
+    Each query is independent with no conversation history.
     """)
+    with gr.Row():
+        with gr.Column():
+            prompt_input = gr.Textbox(
+                label="Enter your prompt",
+                placeholder="What is artificial intelligence?",
+                lines=4
+            )
+            with gr.Row():
+                max_tokens = gr.Slider(
                     minimum=50,
                     maximum=500,
+                    value=300,
                     step=50,
+                    label="Max Response Length"
                 )
+                temperature = gr.Slider(
                     minimum=0.0,
+                    maximum=1.5,
                     value=0.7,
                     step=0.1,
+                    label="Temperature"
                 )
+            submit_btn = gr.Button("Get Responses", variant="primary", size="lg")
+    df_output = gr.Dataframe(label="Results", wrap=True)
+    csv_output = gr.File(label="Download CSV")
+    submit_btn.click(
+        fn=collect_responses,
+        inputs=[prompt_input, max_tokens, temperature],
+        outputs=[df_output, csv_output]
+    )
     gr.Markdown("""
     ---
+    ### 📝 About
+    - Uses Hugging Face Router API
+    - Each response is independent (no chat history)
+    - Results saved to CSV for analysis
     """)
 if __name__ == "__main__":