Spaces:
Runtime error
Runtime error
jaifar530 commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -332,188 +332,53 @@ if press_me_button:
|
|
| 332 |
max_cnn_prob_name = sorted_probabilities[0][0]
|
| 333 |
max_cnn_prob = float(sorted_probabilities[0][1])
|
| 334 |
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
if extra_trees_prediction == predicted_author:
|
| 343 |
-
st.success(f"Most likely written by: **{extra_trees_name}**", icon="β
")
|
| 344 |
-
st.success(f"2nd Most likely written by: **{ridge_name}**", icon="β
")
|
| 345 |
-
# st.warning(f"**Notice:** The input text has been magnified {amplify} times to better capture its characteristics and patterns.", icon="β οΈ")
|
| 346 |
-
st.write("_" * 30)
|
| 347 |
-
# rain(
|
| 348 |
-
# emoji="π",
|
| 349 |
-
# font_size=54,
|
| 350 |
-
# falling_speed=5,
|
| 351 |
-
# animation_length="infinite",
|
| 352 |
-
# )
|
| 353 |
|
| 354 |
elif ridge_prediction == predicted_author:
|
| 355 |
-
st.success(f"Most likely written by: **{
|
| 356 |
st.success(f"2nd Most likely written by: **{extra_trees_name}**", icon="β
")
|
| 357 |
-
# st.warning(f"**Notice:** The input text has been magnified {amplify} times to better capture its characteristics and patterns.", icon="β οΈ")
|
| 358 |
st.write("_" * 30)
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
# animation_length="infinite",
|
| 364 |
-
# )
|
| 365 |
-
|
| 366 |
-
elif ridge_prediction == extra_trees_prediction:
|
| 367 |
-
st.success(f"Most likely written by: **{ridge_name}**", icon="β
")
|
| 368 |
-
st.success(f"2nd Most likely written by: **{cnn_name}**", icon="β
")
|
| 369 |
-
#st.warning(f"**Notice:** The input text has been magnified {amplify} times to better capture its characteristics and patterns.", icon="β οΈ")
|
| 370 |
st.write("_" * 30)
|
| 371 |
-
# rain(
|
| 372 |
-
# emoji="π",
|
| 373 |
-
# font_size=54,
|
| 374 |
-
# falling_speed=5,
|
| 375 |
-
# animation_length="infinite",
|
| 376 |
-
# )
|
| 377 |
-
else:
|
| 378 |
-
# Repeat the text with a space at the end of each iteration
|
| 379 |
|
| 380 |
-
# Load proper pre-trained for full texts
|
| 381 |
-
file_prefix = 'not_trancated_full_paragraph.xlsx'
|
| 382 |
-
with open(f"{file_prefix}_ridge_model.pkl", 'rb') as file:
|
| 383 |
-
ridge_model = pickle.load(file)
|
| 384 |
-
|
| 385 |
-
with open(f"{file_prefix}_extra_trees_model.pkl", 'rb') as file:
|
| 386 |
-
extra_trees_model = pickle.load(file)
|
| 387 |
-
|
| 388 |
-
with open(f"{file_prefix}_vectorizer.pkl", 'rb') as file:
|
| 389 |
-
vectorizer = pickle.load(file)
|
| 390 |
-
|
| 391 |
-
repeated_text = ""
|
| 392 |
-
max_word_count = 500
|
| 393 |
-
amplify = 1
|
| 394 |
-
if word_count >= max_word_count:
|
| 395 |
-
amplify = 2
|
| 396 |
else:
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
new_text = repeated_text
|
| 403 |
-
|
| 404 |
-
word_count = len(re.findall(r'\w+', new_text))
|
| 405 |
-
## Repeat ML
|
| 406 |
-
|
| 407 |
-
# Transform the input
|
| 408 |
-
user_input_transformed = vectorizer.transform([new_text])
|
| 409 |
|
| 410 |
-
|
| 411 |
-
ridge_prediction = ridge_model.predict(user_input_transformed)
|
| 412 |
-
extra_trees_prediction = extra_trees_model.predict(user_input_transformed)
|
| 413 |
-
|
| 414 |
-
### Repeat DL
|
| 415 |
-
predicted_author, author_probabilities = predict_author(new_text, loaded_model, tokenizer, label_encoder)
|
| 416 |
-
sorted_probabilities = sorted(author_probabilities.items(), key=lambda x: x[1], reverse=True)
|
| 417 |
-
|
| 418 |
-
new_max_cnn_prob_name = sorted_probabilities[0][0]
|
| 419 |
-
new_max_cnn_prob = float(sorted_probabilities[0][1])
|
| 420 |
-
|
| 421 |
-
# Get disply name
|
| 422 |
-
cnn_name, ridge_name, extra_trees_name = get_author_display_name(predicted_author, ridge_prediction, extra_trees_prediction)
|
| 423 |
-
with st.expander("2nd iteration Details..."):
|
| 424 |
-
st.write(f"Ridge: {ridge_name}")
|
| 425 |
-
st.write(f"ExtraTree: {extra_trees_name}")
|
| 426 |
-
st.write(f"CNN: {cnn_name}")
|
| 427 |
-
for author, prob in sorted_probabilities:
|
| 428 |
-
display_name = author_map.get(author, author)
|
| 429 |
-
st.write(f"{display_name}: {prob * 100:.2f}%")
|
| 430 |
-
st.progress(float(prob))
|
| 431 |
-
|
| 432 |
if ridge_prediction == extra_trees_prediction == predicted_author:
|
| 433 |
st.success(f"Most likely written by: **{ridge_name}**", icon="β
")
|
| 434 |
-
st.
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
# falling_speed=5,
|
| 440 |
-
# animation_length="infinite",
|
| 441 |
-
# )
|
| 442 |
-
elif new_max_cnn_prob_name == max_cnn_prob_name:
|
| 443 |
-
st.success(f"Most likely written by: **{cnn_name}**", icon="β
")
|
| 444 |
-
st.warning(f"**Notice:** The input text has been magnified {amplify} times to better capture its characteristics and patterns.", icon="β οΈ")
|
| 445 |
st.write("_" * 30)
|
| 446 |
-
|
| 447 |
-
# emoji="π",
|
| 448 |
-
# font_size=54,
|
| 449 |
-
# falling_speed=5,
|
| 450 |
-
# animation_length="infinite",
|
| 451 |
-
# )
|
| 452 |
-
|
| 453 |
elif ridge_prediction == extra_trees_prediction:
|
| 454 |
st.success(f"Most likely written by: **{ridge_name}**", icon="β
")
|
| 455 |
st.success(f"2nd Most likely written by: **{cnn_name}**", icon="β
")
|
| 456 |
-
st.warning(f"**Notice:** The input text has been magnified {amplify} times to better capture its characteristics and patterns.", icon="β οΈ")
|
| 457 |
-
st.write("_" * 30)
|
| 458 |
-
# rain(
|
| 459 |
-
# emoji="π",
|
| 460 |
-
# font_size=54,
|
| 461 |
-
# falling_speed=5,
|
| 462 |
-
# animation_length="infinite",
|
| 463 |
-
# )
|
| 464 |
-
|
| 465 |
-
elif extra_trees_prediction == predicted_author:
|
| 466 |
-
st.success(f"Most likely written by: **{extra_trees_name}**", icon="β
")
|
| 467 |
-
st.success(f"2nd Most likely written by: **{ridge_name}**", icon="β
")
|
| 468 |
-
st.warning(f"**Notice:** The input text has been magnified {amplify} times to better capture its characteristics and patterns.", icon="β οΈ")
|
| 469 |
st.write("_" * 30)
|
| 470 |
-
# rain(
|
| 471 |
-
# emoji="π",
|
| 472 |
-
# font_size=54,
|
| 473 |
-
# falling_speed=5,
|
| 474 |
-
# animation_length="infinite",
|
| 475 |
-
# )
|
| 476 |
-
|
| 477 |
-
elif ridge_prediction == predicted_author:
|
| 478 |
-
st.success(f"Most likely written by: **{ridge_name}**", icon="β
")
|
| 479 |
-
st.success(f"2nd Most likely written by: **{extra_trees_name}**", icon="β
")
|
| 480 |
-
st.warning(f"**Notice:** The input text has been magnified {amplify} times to better capture its characteristics and patterns.", icon="β οΈ")
|
| 481 |
-
st.write("_" * 30)
|
| 482 |
-
# rain(
|
| 483 |
-
# emoji="π",
|
| 484 |
-
# font_size=54,
|
| 485 |
-
# falling_speed=5,
|
| 486 |
-
# animation_length="infinite",
|
| 487 |
-
# )
|
| 488 |
|
| 489 |
-
|
| 490 |
else:
|
| 491 |
st.warning("Notice 1: There is a difficulity predicting your text, it might fill into one of the below:", icon="β οΈ")
|
| 492 |
st.success(f"1- **{ridge_name}**", icon="β
")
|
| 493 |
st.success(f"2- **{cnn_name}**", icon="β
")
|
| 494 |
st.success(f"3- **{extra_trees_name}**", icon="β
")
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
# emoji="π",
|
| 499 |
-
# font_size=54,
|
| 500 |
-
# falling_speed=5,
|
| 501 |
-
# animation_length="infinite",
|
| 502 |
-
# )
|
| 503 |
-
|
| 504 |
-
|
| 505 |
-
# with st.expander("What is this project about?"):
|
| 506 |
-
# st.write("""
|
| 507 |
-
# This project is part of an MSc in Data Analytics at the University of Portsmouth.
|
| 508 |
-
# Developed by Jaifar Al Shizawi, it aims to identify whether a text is written by a human or a specific Large Language Model (LLM) like ChatGPT-3, ChatGPT-4, Google Bard, or HuggingChat.
|
| 509 |
-
# For inquiries, contact [up2152209@myport.ac.uk](mailto:up2152209@myport.ac.uk).
|
| 510 |
-
# Supervised by Dr. Mohamed Bader.
|
| 511 |
-
# """)
|
| 512 |
-
|
| 513 |
-
# for author, prob in sorted_probabilities:
|
| 514 |
-
# display_name = author_map.get(author, author) # Retrieve the display name, fall back to original if not found
|
| 515 |
-
# st.write(f"{display_name}: {prob * 100:.2f}%")
|
| 516 |
-
# st.progress(float(prob))
|
| 517 |
|
| 518 |
# Using expander to make FAQ sections
|
| 519 |
st.subheader("Frequently Asked Questions (FAQ)")
|
|
@@ -566,11 +431,6 @@ with st.expander("Can I use this as evidence?"):
|
|
| 566 |
""")
|
| 567 |
|
| 568 |
|
| 569 |
-
# # Creates a button named 'Press me'
|
| 570 |
-
# list_dir = st.button("list")
|
| 571 |
-
# if list_dir:
|
| 572 |
-
# st.write("Listing directory contents:")
|
| 573 |
-
# st.write(os.listdir('.'))
|
| 574 |
|
| 575 |
|
| 576 |
|
|
|
|
| 332 |
max_cnn_prob_name = sorted_probabilities[0][0]
|
| 333 |
max_cnn_prob = float(sorted_probabilities[0][1])
|
| 334 |
|
| 335 |
+
if word_count < 10 or word_count > 1081:
|
| 336 |
+
st.info("For better prediction input texts between 10 and 1081", icon="βΉοΈ")
|
| 337 |
+
|
| 338 |
+
elif word_count < 256:
|
| 339 |
+
if ridge_prediction == extra_trees_prediction == predicted_author:
|
| 340 |
+
st.success(f"Most likely written by: **{cnn_name}**", icon="β
")
|
| 341 |
+
st.info("We are quite confident in the accuracy of this result.", icon="βΉοΈ")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 342 |
|
| 343 |
elif ridge_prediction == predicted_author:
|
| 344 |
+
st.success(f"Most likely written by: **{cnn_name}**", icon="β
")
|
| 345 |
st.success(f"2nd Most likely written by: **{extra_trees_name}**", icon="β
")
|
|
|
|
| 346 |
st.write("_" * 30)
|
| 347 |
+
|
| 348 |
+
elif extra_trees_prediction == predicted_author:
|
| 349 |
+
st.success(f"Most likely written by: **{cnn_name}**", icon="β
")
|
| 350 |
+
st.success(f"2nd Most likely written by: **{ridge_name}**", icon="β
")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 351 |
st.write("_" * 30)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 352 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 353 |
else:
|
| 354 |
+
st.warning("Notice 1: There is a difficulity predicting your text, it might fill into one of the below:", icon="β οΈ")
|
| 355 |
+
st.success(f"1- **{cnn_name}**", icon="β
")
|
| 356 |
+
st.success(f"2- **{ridge_name}**", icon="β
")
|
| 357 |
+
st.success(f"3- **{extra_trees_name}**", icon="β
")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 358 |
|
| 359 |
+
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 360 |
if ridge_prediction == extra_trees_prediction == predicted_author:
|
| 361 |
st.success(f"Most likely written by: **{ridge_name}**", icon="β
")
|
| 362 |
+
st.info("We are quite confident in the accuracy of this result.", icon="βΉοΈ")
|
| 363 |
+
|
| 364 |
+
elif ridge_prediction == predicted_author:
|
| 365 |
+
st.success(f"Most likely written by: **{ridge_name}**", icon="β
")
|
| 366 |
+
st.success(f"2nd Most likely written by: **{extra_trees_name}**", icon="β
")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 367 |
st.write("_" * 30)
|
| 368 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 369 |
elif ridge_prediction == extra_trees_prediction:
|
| 370 |
st.success(f"Most likely written by: **{ridge_name}**", icon="β
")
|
| 371 |
st.success(f"2nd Most likely written by: **{cnn_name}**", icon="β
")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 372 |
st.write("_" * 30)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 373 |
|
|
|
|
| 374 |
else:
|
| 375 |
st.warning("Notice 1: There is a difficulity predicting your text, it might fill into one of the below:", icon="β οΈ")
|
| 376 |
st.success(f"1- **{ridge_name}**", icon="β
")
|
| 377 |
st.success(f"2- **{cnn_name}**", icon="β
")
|
| 378 |
st.success(f"3- **{extra_trees_name}**", icon="β
")
|
| 379 |
+
|
| 380 |
+
|
| 381 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 382 |
|
| 383 |
# Using expander to make FAQ sections
|
| 384 |
st.subheader("Frequently Asked Questions (FAQ)")
|
|
|
|
| 431 |
""")
|
| 432 |
|
| 433 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 434 |
|
| 435 |
|
| 436 |
|