Spaces:

jsakshi
/

GSOC

Runtime error

App Files Files Community

jsakshi commited on Mar 11, 2025

Commit

9f8d6cb

verified ·

1 Parent(s): c59bcee

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -185

app.py CHANGED Viewed

@@ -1,191 +1,21 @@
-'''import requests
-import json
-from datasets import load_dataset
-import numpy as np
-from tqdm import tqdm
-import time
-import os
-# Set your Hugging Face API token (set it as an environment variable)
-HF_API_TOKEN = os.environ.get("HF_TOKEN", "")
-# Model and task configuration
-MODELS = {
-    "describeai-gemini": "describeai/gemini",
-    "deepseek-32b": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
-}
-TASK = "rte"  # SuperGLUE task: Recognizing Textual Entailment
-# Load SuperGLUE dataset
-print("Loading dataset...")
-dataset = load_dataset("super_glue", TASK, trust_remote_code=True)
-print(f"Dataset loaded: {len(dataset['validation'])} validation examples")
-def query_hf_api(model_id, inputs, api_token):
-    """Query the Hugging Face Inference API."""
-    API_URL = f"https://api-inference.huggingface.co/models/{model_id}"
-    headers = {"Authorization": f"Bearer {api_token}"}
-    payload = {
-        "inputs": inputs,
-        "options": {"wait_for_model": True}
-    }
-    max_retries = 5
-    for attempt in range(max_retries):
-        response = requests.post(API_URL, headers=headers, json=payload)
-        if response.status_code == 200:
-            return response.json()
-        elif response.status_code == 429:  # Too Many Requests
-            wait_time = 2 ** attempt
-            print(f"Rate limited. Waiting {wait_time} seconds...")
-            time.sleep(wait_time)
-        else:
-            print(f"Error: {response.status_code}, {response.text}")
-            break
-    return None
-def evaluate_model_with_api(model_name, model_path, dataset, api_token):
-    """Evaluate model using the Hugging Face Inference API."""
-    print(f"\nEvaluating {model_name} on {TASK} using Inference API...")
-    predictions = []
-    labels = []
-    eval_subset = dataset["validation"]
-    max_samples = min(10, len(eval_subset))  # Limit to 100 samples for API efficiency
-    for i in tqdm(range(max_samples), desc=f"Evaluating {model_name}"):
-        example = eval_subset[i]
-        input_text = f"Premise: {example['premise']}\nHypothesis: {example['hypothesis']}"
-        result = query_hf_api(model_path, input_text, api_token)
-        # Ensure pred is always assigned
-        pred = 0  # Default to 0 in case of an unexpected response
-        if result:
-            try:
-                if isinstance(result, list) and len(result) > 0 and isinstance(result[0], dict):
-                    if "label" in result[0]:
-                        pred = 1 if result[0]["label"].lower() in ["entailment", "1", "true"] else 0
-                    elif "score" in result[0]:  # Handling a different API format
-                        scores = [item["score"] for item in result]
-                        pred = 0 if scores[0] > scores[1] else 1
-                else:
-                    pred = 1 if "entailment" in str(result).lower() else 0
-            except Exception as e:
-                print(f"Error parsing result: {e}, {result}")
-        predictions.append(pred)
-        labels.append(example["label"])
-        time.sleep(0.5)
-    correct = sum(1 for p, l in zip(predictions, labels) if p == l)
-    accuracy = correct / len(predictions) if predictions else 0
-    results = {
-        "eval_accuracy": accuracy,
-        "num_samples": len(predictions)
-    }
-    print(f"Results for {model_name}: Accuracy = {accuracy:.4f}")
-    return results
-# Ensure API token is set
-if not HF_API_TOKEN:
-    print("Error: HF_API_TOKEN not set. Please set your Hugging Face API token.")
-    exit(1)
-results = {}
-for model_name, model_path in MODELS.items():
-    results[model_name] = evaluate_model_with_api(model_name, model_path, dataset, HF_API_TOKEN)
-# Compare results
-print("\nComparison of Results:")
-for model_name, eval_results in results.items():
-    print(f"{model_name}: {eval_results['eval_accuracy']:.4f} accuracy on {TASK}")
-# Save results
-with open("deepseek_vs_tinyllama_rte_results.json", "w") as f:
-    json.dump(results, f, indent=4)'''
 import torch
-from transformers import AutoModelForSequenceClassification, AutoTokenizer
-from datasets import load_dataset
-import numpy as np
-from tqdm import tqdm
-# Define models
-MODELS = {
-    "describeai-gemini": "describeai/gemini",
-    "deepseek-32b": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
-}
-TASK = "rte"  # Recognizing Textual Entailment (RTE) task
-device = "cuda" if torch.cuda.is_available() else "cpu"  # Use GPU if available
-# Load dataset
-print("Loading dataset...")
-dataset = load_dataset("super_glue", TASK, trust_remote_code=True)
-print(f"Dataset loaded: {len(dataset['validation'])} validation examples")
-def load_model_and_tokenizer(model_name):
-    """Loads model and tokenizer."""
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)
-    model.eval()  # Set model to evaluation mode
-    return model, tokenizer
-def predict(model, tokenizer, input_texts):
-    """Runs inference on input texts and returns predictions."""
-    inputs = tokenizer(input_texts, padding=True, truncation=True, return_tensors="pt").to(device)
-    with torch.no_grad():
-        outputs = model(**inputs)
-    logits = outputs.logits
-    preds = torch.argmax(logits, dim=1).cpu().numpy()  # Convert logits to class predictions
-    return preds
-def evaluate_model(model_name, model_path, dataset):
-    """Evaluates a model on the RTE dataset."""
-    print(f"\nEvaluating {model_name} on {TASK}...")
-    model, tokenizer = load_model_and_tokenizer(model_path)
-    predictions = []
-    labels = []
-    eval_subset = dataset["validation"]
-    max_samples = min(5, len(eval_subset))  # Limit to 10 samples for efficiency
-    for i in tqdm(range(max_samples), desc=f"Evaluating {model_name}"):
-        example = eval_subset[i]
-        input_text = f"Premise: {example['premise']}\nHypothesis: {example['hypothesis']}"
-        pred = predict(model, tokenizer, [input_text])[0]  # Get single prediction
-        predictions.append(pred)
-        labels.append(example["label"])
-    accuracy = np.mean(np.array(predictions) == np.array(labels))
-    print(f"Results for {model_name}: Accuracy = {accuracy:.4f}")
-    return {"eval_accuracy": accuracy, "num_samples": len(predictions)}
-# Run evaluation
-results = {}
-for model_name, model_path in MODELS.items():
-    results[model_name] = evaluate_model(model_name, model_path, dataset)
-# Save results
-import json
-with open("direct_model_rte_results.json", "w") as f:
-    json.dump(results, f, indent=4)
-print("\nFinal Results:")
-for model_name, eval_results in results.items():
-    print(f"{model_name}: {eval_results['eval_accuracy']:.4f} accuracy on {TASK}")

+from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
+# Load the model and tokenizer
+model_name = "describeai/gemini"  # Replace with the actual Gemini model if available on HF
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(model_name)
+# Define input text
+input_text = "Explain the Python function below:\n\ndef add(a, b):\n    return a + b"
+# Tokenize input
+inputs = tokenizer(input_text, return_tensors="pt")
+# Generate response
+with torch.no_grad():
+    output = model.generate(**inputs, max_length=100)
+# Decode and print result
+response = tokenizer.decode(output[0], skip_special_tokens=True)
+print("Model Output:", response)