Spaces:
Build error
Build error
fixed the csv results issue
Browse files
app.py
CHANGED
|
@@ -9,7 +9,6 @@ import numpy as np
|
|
| 9 |
from functools import lru_cache
|
| 10 |
|
| 11 |
# Cache the model loading
|
| 12 |
-
# @gr.cache
|
| 13 |
@lru_cache(maxsize=1)
|
| 14 |
def load_model():
|
| 15 |
model_path = "MMADS/MoralFoundationsClassifier"
|
|
@@ -144,34 +143,48 @@ def process_text(text):
|
|
| 144 |
|
| 145 |
return scores_text, bar_chart
|
| 146 |
|
| 147 |
-
def process_csv(file):
|
| 148 |
"""Process CSV file with multiple texts"""
|
| 149 |
if file is None:
|
| 150 |
-
return "Please upload a CSV file", None, None
|
| 151 |
|
| 152 |
try:
|
| 153 |
# Read CSV
|
| 154 |
df = pd.read_csv(file.name)
|
| 155 |
|
| 156 |
if 'text' not in df.columns:
|
| 157 |
-
return "Error: CSV must contain a 'text' column", None, None
|
| 158 |
|
| 159 |
texts = df['text'].tolist()
|
| 160 |
|
| 161 |
# Load model and process in batches
|
|
|
|
| 162 |
model, tokenizer, label_names = load_model()
|
| 163 |
|
| 164 |
# Process in batches of 32
|
| 165 |
batch_size = 32
|
| 166 |
all_results = []
|
|
|
|
| 167 |
|
| 168 |
for i in range(0, len(texts), batch_size):
|
|
|
|
|
|
|
|
|
|
| 169 |
batch_texts = texts[i:i+batch_size]
|
| 170 |
batch_results = predict_batch(batch_texts, model, tokenizer, label_names)
|
| 171 |
all_results.extend(batch_results)
|
| 172 |
|
|
|
|
|
|
|
| 173 |
# Create summary
|
| 174 |
summary = f"**Processed {len(texts)} texts**\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
|
| 176 |
# Create visualizations
|
| 177 |
bar_chart = create_visualization(all_results)
|
|
@@ -185,12 +198,14 @@ def process_csv(file):
|
|
| 185 |
} for r in all_results
|
| 186 |
])
|
| 187 |
|
| 188 |
-
|
|
|
|
|
|
|
| 189 |
|
| 190 |
-
return summary
|
| 191 |
|
| 192 |
except Exception as e:
|
| 193 |
-
return f"Error processing CSV: {str(e)}", None, None
|
| 194 |
|
| 195 |
# Create example texts
|
| 196 |
example_texts = [
|
|
@@ -244,6 +259,8 @@ with gr.Blocks(title="Moral Foundations Classifier") as demo:
|
|
| 244 |
gr.Markdown("""
|
| 245 |
Upload a CSV file with a 'text' column containing the texts to analyze.
|
| 246 |
The app will process all texts and provide aggregate visualizations.
|
|
|
|
|
|
|
| 247 |
""")
|
| 248 |
|
| 249 |
csv_input = gr.File(
|
|
@@ -257,12 +274,15 @@ with gr.Blocks(title="Moral Foundations Classifier") as demo:
|
|
| 257 |
|
| 258 |
with gr.Row():
|
| 259 |
bar_output = gr.Plot(label="Average Scores")
|
| 260 |
-
heatmap_output = gr.Plot(label="Scores Heatmap")
|
|
|
|
|
|
|
|
|
|
| 261 |
|
| 262 |
process_btn.click(
|
| 263 |
fn=process_csv,
|
| 264 |
inputs=csv_input,
|
| 265 |
-
outputs=[summary_output, bar_output, heatmap_output]
|
| 266 |
)
|
| 267 |
|
| 268 |
gr.Markdown("""
|
|
|
|
| 9 |
from functools import lru_cache
|
| 10 |
|
| 11 |
# Cache the model loading
|
|
|
|
| 12 |
@lru_cache(maxsize=1)
|
| 13 |
def load_model():
|
| 14 |
model_path = "MMADS/MoralFoundationsClassifier"
|
|
|
|
| 143 |
|
| 144 |
return scores_text, bar_chart
|
| 145 |
|
| 146 |
+
def process_csv(file, progress=gr.Progress()):
|
| 147 |
"""Process CSV file with multiple texts"""
|
| 148 |
if file is None:
|
| 149 |
+
return "Please upload a CSV file", None, None, None
|
| 150 |
|
| 151 |
try:
|
| 152 |
# Read CSV
|
| 153 |
df = pd.read_csv(file.name)
|
| 154 |
|
| 155 |
if 'text' not in df.columns:
|
| 156 |
+
return "Error: CSV must contain a 'text' column", None, None, None
|
| 157 |
|
| 158 |
texts = df['text'].tolist()
|
| 159 |
|
| 160 |
# Load model and process in batches
|
| 161 |
+
progress(0, desc="Loading model...")
|
| 162 |
model, tokenizer, label_names = load_model()
|
| 163 |
|
| 164 |
# Process in batches of 32
|
| 165 |
batch_size = 32
|
| 166 |
all_results = []
|
| 167 |
+
total_batches = (len(texts) + batch_size - 1) // batch_size
|
| 168 |
|
| 169 |
for i in range(0, len(texts), batch_size):
|
| 170 |
+
batch_num = i // batch_size + 1
|
| 171 |
+
progress(batch_num / total_batches, desc=f"Processing batch {batch_num}/{total_batches}")
|
| 172 |
+
|
| 173 |
batch_texts = texts[i:i+batch_size]
|
| 174 |
batch_results = predict_batch(batch_texts, model, tokenizer, label_names)
|
| 175 |
all_results.extend(batch_results)
|
| 176 |
|
| 177 |
+
progress(0.9, desc="Creating visualizations...")
|
| 178 |
+
|
| 179 |
# Create summary
|
| 180 |
summary = f"**Processed {len(texts)} texts**\n\n"
|
| 181 |
+
summary += "**Average Scores Across All Texts:**\n\n"
|
| 182 |
+
|
| 183 |
+
# Calculate average scores
|
| 184 |
+
avg_scores = {}
|
| 185 |
+
for label in label_names:
|
| 186 |
+
avg_scores[label] = np.mean([r['scores'][label] for r in all_results])
|
| 187 |
+
summary += f"{label.replace('_', ' ').title()}: {avg_scores[label]:.4f}\n"
|
| 188 |
|
| 189 |
# Create visualizations
|
| 190 |
bar_chart = create_visualization(all_results)
|
|
|
|
| 198 |
} for r in all_results
|
| 199 |
])
|
| 200 |
|
| 201 |
+
# Save to a temporary file and return the path
|
| 202 |
+
output_path = "results.csv"
|
| 203 |
+
results_df.to_csv(output_path, index=False)
|
| 204 |
|
| 205 |
+
return summary, bar_chart, heatmap, output_path
|
| 206 |
|
| 207 |
except Exception as e:
|
| 208 |
+
return f"Error processing CSV: {str(e)}", None, None, None
|
| 209 |
|
| 210 |
# Create example texts
|
| 211 |
example_texts = [
|
|
|
|
| 259 |
gr.Markdown("""
|
| 260 |
Upload a CSV file with a 'text' column containing the texts to analyze.
|
| 261 |
The app will process all texts and provide aggregate visualizations.
|
| 262 |
+
|
| 263 |
+
A sample CSV file is available for download [here](https://huggingface.co/spaces/MMADS/MoralFoundationsClassifier-app/tree/main/examples
|
| 264 |
""")
|
| 265 |
|
| 266 |
csv_input = gr.File(
|
|
|
|
| 274 |
|
| 275 |
with gr.Row():
|
| 276 |
bar_output = gr.Plot(label="Average Scores")
|
| 277 |
+
heatmap_output = gr.Plot(label="Scores Heatmap (First 20 texts)")
|
| 278 |
+
|
| 279 |
+
# Add download component
|
| 280 |
+
download_output = gr.File(label="Download Results", visible=True)
|
| 281 |
|
| 282 |
process_btn.click(
|
| 283 |
fn=process_csv,
|
| 284 |
inputs=csv_input,
|
| 285 |
+
outputs=[summary_output, bar_output, heatmap_output, download_output]
|
| 286 |
)
|
| 287 |
|
| 288 |
gr.Markdown("""
|