jsakshi commited on
Commit
9f8d6cb
·
verified ·
1 Parent(s): c59bcee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -185
app.py CHANGED
@@ -1,191 +1,21 @@
1
- '''import requests
2
- import json
3
- from datasets import load_dataset
4
- import numpy as np
5
- from tqdm import tqdm
6
- import time
7
- import os
8
-
9
- # Set your Hugging Face API token (set it as an environment variable)
10
- HF_API_TOKEN = os.environ.get("HF_TOKEN", "")
11
-
12
- # Model and task configuration
13
- MODELS = {
14
- "describeai-gemini": "describeai/gemini",
15
- "deepseek-32b": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
16
-
17
- }
18
- TASK = "rte" # SuperGLUE task: Recognizing Textual Entailment
19
-
20
- # Load SuperGLUE dataset
21
- print("Loading dataset...")
22
- dataset = load_dataset("super_glue", TASK, trust_remote_code=True)
23
- print(f"Dataset loaded: {len(dataset['validation'])} validation examples")
24
-
25
- def query_hf_api(model_id, inputs, api_token):
26
- """Query the Hugging Face Inference API."""
27
- API_URL = f"https://api-inference.huggingface.co/models/{model_id}"
28
- headers = {"Authorization": f"Bearer {api_token}"}
29
-
30
- payload = {
31
- "inputs": inputs,
32
- "options": {"wait_for_model": True}
33
- }
34
-
35
- max_retries = 5
36
- for attempt in range(max_retries):
37
- response = requests.post(API_URL, headers=headers, json=payload)
38
- if response.status_code == 200:
39
- return response.json()
40
- elif response.status_code == 429: # Too Many Requests
41
- wait_time = 2 ** attempt
42
- print(f"Rate limited. Waiting {wait_time} seconds...")
43
- time.sleep(wait_time)
44
- else:
45
- print(f"Error: {response.status_code}, {response.text}")
46
- break
47
-
48
- return None
49
-
50
- def evaluate_model_with_api(model_name, model_path, dataset, api_token):
51
- """Evaluate model using the Hugging Face Inference API."""
52
- print(f"\nEvaluating {model_name} on {TASK} using Inference API...")
53
-
54
- predictions = []
55
- labels = []
56
-
57
- eval_subset = dataset["validation"]
58
- max_samples = min(10, len(eval_subset)) # Limit to 100 samples for API efficiency
59
-
60
- for i in tqdm(range(max_samples), desc=f"Evaluating {model_name}"):
61
- example = eval_subset[i]
62
- input_text = f"Premise: {example['premise']}\nHypothesis: {example['hypothesis']}"
63
-
64
- result = query_hf_api(model_path, input_text, api_token)
65
-
66
- # Ensure pred is always assigned
67
- pred = 0 # Default to 0 in case of an unexpected response
68
-
69
- if result:
70
- try:
71
- if isinstance(result, list) and len(result) > 0 and isinstance(result[0], dict):
72
- if "label" in result[0]:
73
- pred = 1 if result[0]["label"].lower() in ["entailment", "1", "true"] else 0
74
- elif "score" in result[0]: # Handling a different API format
75
- scores = [item["score"] for item in result]
76
- pred = 0 if scores[0] > scores[1] else 1
77
- else:
78
- pred = 1 if "entailment" in str(result).lower() else 0
79
- except Exception as e:
80
- print(f"Error parsing result: {e}, {result}")
81
-
82
- predictions.append(pred)
83
- labels.append(example["label"])
84
-
85
- time.sleep(0.5)
86
-
87
- correct = sum(1 for p, l in zip(predictions, labels) if p == l)
88
- accuracy = correct / len(predictions) if predictions else 0
89
-
90
- results = {
91
- "eval_accuracy": accuracy,
92
- "num_samples": len(predictions)
93
- }
94
-
95
- print(f"Results for {model_name}: Accuracy = {accuracy:.4f}")
96
- return results
97
-
98
-
99
- # Ensure API token is set
100
- if not HF_API_TOKEN:
101
- print("Error: HF_API_TOKEN not set. Please set your Hugging Face API token.")
102
- exit(1)
103
-
104
- results = {}
105
- for model_name, model_path in MODELS.items():
106
- results[model_name] = evaluate_model_with_api(model_name, model_path, dataset, HF_API_TOKEN)
107
-
108
- # Compare results
109
- print("\nComparison of Results:")
110
- for model_name, eval_results in results.items():
111
- print(f"{model_name}: {eval_results['eval_accuracy']:.4f} accuracy on {TASK}")
112
-
113
- # Save results
114
- with open("deepseek_vs_tinyllama_rte_results.json", "w") as f:
115
- json.dump(results, f, indent=4)'''
116
-
117
  import torch
118
- from transformers import AutoModelForSequenceClassification, AutoTokenizer
119
- from datasets import load_dataset
120
- import numpy as np
121
- from tqdm import tqdm
122
-
123
- # Define models
124
- MODELS = {
125
- "describeai-gemini": "describeai/gemini",
126
- "deepseek-32b": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
127
- }
128
-
129
- TASK = "rte" # Recognizing Textual Entailment (RTE) task
130
- device = "cuda" if torch.cuda.is_available() else "cpu" # Use GPU if available
131
-
132
- # Load dataset
133
- print("Loading dataset...")
134
- dataset = load_dataset("super_glue", TASK, trust_remote_code=True)
135
- print(f"Dataset loaded: {len(dataset['validation'])} validation examples")
136
-
137
- def load_model_and_tokenizer(model_name):
138
- """Loads model and tokenizer."""
139
- tokenizer = AutoTokenizer.from_pretrained(model_name)
140
- model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)
141
- model.eval() # Set model to evaluation mode
142
- return model, tokenizer
143
-
144
- def predict(model, tokenizer, input_texts):
145
- """Runs inference on input texts and returns predictions."""
146
- inputs = tokenizer(input_texts, padding=True, truncation=True, return_tensors="pt").to(device)
147
- with torch.no_grad():
148
- outputs = model(**inputs)
149
- logits = outputs.logits
150
- preds = torch.argmax(logits, dim=1).cpu().numpy() # Convert logits to class predictions
151
- return preds
152
-
153
- def evaluate_model(model_name, model_path, dataset):
154
- """Evaluates a model on the RTE dataset."""
155
- print(f"\nEvaluating {model_name} on {TASK}...")
156
-
157
- model, tokenizer = load_model_and_tokenizer(model_path)
158
-
159
- predictions = []
160
- labels = []
161
-
162
- eval_subset = dataset["validation"]
163
- max_samples = min(5, len(eval_subset)) # Limit to 10 samples for efficiency
164
-
165
- for i in tqdm(range(max_samples), desc=f"Evaluating {model_name}"):
166
- example = eval_subset[i]
167
- input_text = f"Premise: {example['premise']}\nHypothesis: {example['hypothesis']}"
168
-
169
- pred = predict(model, tokenizer, [input_text])[0] # Get single prediction
170
- predictions.append(pred)
171
- labels.append(example["label"])
172
-
173
- accuracy = np.mean(np.array(predictions) == np.array(labels))
174
- print(f"Results for {model_name}: Accuracy = {accuracy:.4f}")
175
 
176
- return {"eval_accuracy": accuracy, "num_samples": len(predictions)}
 
 
 
177
 
178
- # Run evaluation
179
- results = {}
180
- for model_name, model_path in MODELS.items():
181
- results[model_name] = evaluate_model(model_name, model_path, dataset)
182
 
183
- # Save results
184
- import json
185
- with open("direct_model_rte_results.json", "w") as f:
186
- json.dump(results, f, indent=4)
187
 
188
- print("\nFinal Results:")
189
- for model_name, eval_results in results.items():
190
- print(f"{model_name}: {eval_results['eval_accuracy']:.4f} accuracy on {TASK}")
191
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import torch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
+ # Load the model and tokenizer
5
+ model_name = "describeai/gemini" # Replace with the actual Gemini model if available on HF
6
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
7
+ model = AutoModelForCausalLM.from_pretrained(model_name)
8
 
9
+ # Define input text
10
+ input_text = "Explain the Python function below:\n\ndef add(a, b):\n return a + b"
 
 
11
 
12
+ # Tokenize input
13
+ inputs = tokenizer(input_text, return_tensors="pt")
 
 
14
 
15
+ # Generate response
16
+ with torch.no_grad():
17
+ output = model.generate(**inputs, max_length=100)
18
 
19
+ # Decode and print result
20
+ response = tokenizer.decode(output[0], skip_special_tokens=True)
21
+ print("Model Output:", response)