Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -326,9 +326,10 @@ def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, to
|
|
| 326 |
texts = [clean_arabic_text(poem) for poem in group['poem'].dropna()]
|
| 327 |
all_emotions = []
|
| 328 |
|
| 329 |
-
#
|
| 330 |
embeddings = []
|
| 331 |
for i, text in enumerate(texts):
|
|
|
|
| 332 |
text_chunks = [text[i:i+512] for i in range(0, len(text), 512)]
|
| 333 |
chunk_embeddings = []
|
| 334 |
|
|
@@ -336,27 +337,20 @@ def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, to
|
|
| 336 |
chunk_embedding = get_embedding_for_text(chunk, bert_tokenizer, bert_model)
|
| 337 |
chunk_embeddings.append(chunk_embedding)
|
| 338 |
|
|
|
|
| 339 |
full_embedding = np.mean(chunk_embeddings, axis=0)
|
| 340 |
embeddings.append(full_embedding)
|
| 341 |
|
| 342 |
progress = (i + 1) / len(texts) * 0.4
|
| 343 |
progress_bar.progress(progress, text=f"Generated embeddings for {i+1}/{len(texts)} poems...")
|
| 344 |
-
|
| 345 |
embeddings = np.array(embeddings)
|
| 346 |
|
| 347 |
-
# Process emotions with
|
| 348 |
for i, text in enumerate(texts):
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
for chunk in text_chunks:
|
| 353 |
-
emotion = emotion_classifier(chunk)[0]['label']
|
| 354 |
-
chunk_emotions.append(emotion)
|
| 355 |
-
|
| 356 |
-
# Use most common emotion for the full text
|
| 357 |
-
final_emotion = max(set(chunk_emotions), key=chunk_emotions.count)
|
| 358 |
-
all_emotions.append(final_emotion)
|
| 359 |
-
|
| 360 |
progress = 0.4 + ((i + 1) / len(texts) * 0.3)
|
| 361 |
progress_bar.progress(progress, text=f"Classified emotions for {i+1}/{len(texts)} poems...")
|
| 362 |
|
|
@@ -380,7 +374,6 @@ def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, to
|
|
| 380 |
|
| 381 |
return summaries, topic_model
|
| 382 |
|
| 383 |
-
|
| 384 |
try:
|
| 385 |
bert_tokenizer, bert_model, emotion_classifier = load_models()
|
| 386 |
st.success("Models loaded successfully!")
|
|
|
|
| 326 |
texts = [clean_arabic_text(poem) for poem in group['poem'].dropna()]
|
| 327 |
all_emotions = []
|
| 328 |
|
| 329 |
+
# Get embeddings with proper output handling
|
| 330 |
embeddings = []
|
| 331 |
for i, text in enumerate(texts):
|
| 332 |
+
# Split text into chunks that respect the 512 token limit
|
| 333 |
text_chunks = [text[i:i+512] for i in range(0, len(text), 512)]
|
| 334 |
chunk_embeddings = []
|
| 335 |
|
|
|
|
| 337 |
chunk_embedding = get_embedding_for_text(chunk, bert_tokenizer, bert_model)
|
| 338 |
chunk_embeddings.append(chunk_embedding)
|
| 339 |
|
| 340 |
+
# Combine chunk embeddings to represent the full poem
|
| 341 |
full_embedding = np.mean(chunk_embeddings, axis=0)
|
| 342 |
embeddings.append(full_embedding)
|
| 343 |
|
| 344 |
progress = (i + 1) / len(texts) * 0.4
|
| 345 |
progress_bar.progress(progress, text=f"Generated embeddings for {i+1}/{len(texts)} poems...")
|
| 346 |
+
|
| 347 |
embeddings = np.array(embeddings)
|
| 348 |
|
| 349 |
+
# Process emotions with tuple output handling
|
| 350 |
for i, text in enumerate(texts):
|
| 351 |
+
result = emotion_classifier(text)
|
| 352 |
+
emotion = result[0] # Access first element of tuple
|
| 353 |
+
all_emotions.append(emotion)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 354 |
progress = 0.4 + ((i + 1) / len(texts) * 0.3)
|
| 355 |
progress_bar.progress(progress, text=f"Classified emotions for {i+1}/{len(texts)} poems...")
|
| 356 |
|
|
|
|
| 374 |
|
| 375 |
return summaries, topic_model
|
| 376 |
|
|
|
|
| 377 |
try:
|
| 378 |
bert_tokenizer, bert_model, emotion_classifier = load_models()
|
| 379 |
st.success("Models loaded successfully!")
|