Spaces:
Sleeping
Sleeping
Commit
·
ae3712d
1
Parent(s):
f9baad9
Made changes according to the website
Browse files
app.py
CHANGED
|
@@ -293,30 +293,29 @@ async def search(
|
|
| 293 |
|
| 294 |
results = [
|
| 295 |
{
|
|
|
|
| 296 |
"title": title,
|
| 297 |
"author": author,
|
| 298 |
"publisher": publisher,
|
| 299 |
"description": description,
|
| 300 |
-
"
|
| 301 |
}
|
| 302 |
-
for title, author, publisher, description, image in
|
| 303 |
-
titles, authors, publishers, descriptions, images
|
| 304 |
)
|
| 305 |
]
|
| 306 |
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
return response
|
| 310 |
|
| 311 |
|
| 312 |
@app.post("/classify")
|
| 313 |
-
async def classify(data:
|
| 314 |
"""
|
| 315 |
Create classifier pipeline and return the results.
|
| 316 |
"""
|
| 317 |
-
titles = [book["title"] for book in data
|
| 318 |
-
descriptions = [book["description"] for book in data
|
| 319 |
-
publishers = [book["publisher"] for book in data
|
| 320 |
|
| 321 |
# Combine title, description, and publisher into a single string
|
| 322 |
combined_data = [
|
|
@@ -369,7 +368,9 @@ async def classify(data: dict, runtime: str = "normal"):
|
|
| 369 |
classes = [
|
| 370 |
{
|
| 371 |
"audience": classifier_pipe(doc, audience)["labels"][0],
|
| 372 |
-
"
|
|
|
|
|
|
|
| 373 |
}
|
| 374 |
for doc in combined_data
|
| 375 |
]
|
|
@@ -378,16 +379,16 @@ async def classify(data: dict, runtime: str = "normal"):
|
|
| 378 |
|
| 379 |
|
| 380 |
@app.post("/find_similar")
|
| 381 |
-
async def find_similar(data:
|
| 382 |
"""
|
| 383 |
-
Calculate the similarity between the
|
| 384 |
"""
|
| 385 |
from sentence_transformers import SentenceTransformer
|
| 386 |
from sentence_transformers import util
|
| 387 |
|
| 388 |
-
titles = [book["title"] for book in data
|
| 389 |
-
descriptions = [book["description"] for book in data
|
| 390 |
-
publishers = [book["publisher"] for book in data
|
| 391 |
|
| 392 |
# Combine title, description, and publisher into a single string
|
| 393 |
combined_data = [
|
|
@@ -402,6 +403,7 @@ async def find_similar(data: dict, runtime: str = "normal", top_k: int = 5):
|
|
| 402 |
top_k = len(combined_data) if top_k > len(combined_data) else top_k
|
| 403 |
|
| 404 |
similar_books = []
|
|
|
|
| 405 |
for i in range(len(combined_data)):
|
| 406 |
# Get the embedding for the ith book
|
| 407 |
current_embedding = book_embeddings[i]
|
|
@@ -418,9 +420,7 @@ async def find_similar(data: dict, runtime: str = "normal", top_k: int = 5):
|
|
| 418 |
}
|
| 419 |
)
|
| 420 |
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
return response
|
| 424 |
|
| 425 |
|
| 426 |
@app.post("/summarize")
|
|
@@ -451,135 +451,9 @@ async def summarize(descriptions: list, runtime="normal"):
|
|
| 451 |
# Summarize the descriptions
|
| 452 |
summaries = [
|
| 453 |
summarizer_pipe(description)
|
| 454 |
-
if (len(description) > 0 and description != "Null")
|
| 455 |
else [{"summary_text": "No summary text is available."}]
|
| 456 |
for description in descriptions
|
| 457 |
]
|
| 458 |
|
| 459 |
return summaries
|
| 460 |
-
|
| 461 |
-
def classify(combined_data, runtime="normal"):
|
| 462 |
-
"""
|
| 463 |
-
Create classifier pipeline and return the results.
|
| 464 |
-
"""
|
| 465 |
-
from transformers import (
|
| 466 |
-
AutoTokenizer,
|
| 467 |
-
AutoModelForSequenceClassification,
|
| 468 |
-
pipeline,
|
| 469 |
-
)
|
| 470 |
-
from optimum.onnxruntime import ORTModelForSequenceClassification
|
| 471 |
-
from optimum.bettertransformer import BetterTransformer
|
| 472 |
-
|
| 473 |
-
if runtime == "normal":
|
| 474 |
-
# Define the zero-shot classifier
|
| 475 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
| 476 |
-
"sileod/deberta-v3-base-tasksource-nli"
|
| 477 |
-
)
|
| 478 |
-
model = AutoModelForSequenceClassification.from_pretrained(
|
| 479 |
-
"sileod/deberta-v3-base-tasksource-nli"
|
| 480 |
-
)
|
| 481 |
-
elif runtime == "onnxruntime":
|
| 482 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
| 483 |
-
"optimum/distilbert-base-uncased-mnli"
|
| 484 |
-
)
|
| 485 |
-
model = ORTModelForSequenceClassification.from_pretrained(
|
| 486 |
-
"optimum/distilbert-base-uncased-mnli"
|
| 487 |
-
)
|
| 488 |
-
|
| 489 |
-
classifier_pipe = pipeline(
|
| 490 |
-
"zero-shot-classification",
|
| 491 |
-
model=model,
|
| 492 |
-
tokenizer=tokenizer,
|
| 493 |
-
hypothesis_template="This book is {}.",
|
| 494 |
-
batch_size=1,
|
| 495 |
-
device=-1,
|
| 496 |
-
multi_label=False,
|
| 497 |
-
)
|
| 498 |
-
|
| 499 |
-
# Define the candidate labels
|
| 500 |
-
level = [
|
| 501 |
-
"Introductory",
|
| 502 |
-
"Advanced",
|
| 503 |
-
]
|
| 504 |
-
|
| 505 |
-
audience = ["Academic", "Not Academic", "Manual"]
|
| 506 |
-
|
| 507 |
-
classes = [
|
| 508 |
-
{
|
| 509 |
-
"audience": classifier_pipe(doc, audience),
|
| 510 |
-
"level": classifier_pipe(doc, level),
|
| 511 |
-
}
|
| 512 |
-
for doc in combined_data
|
| 513 |
-
]
|
| 514 |
-
|
| 515 |
-
return classes
|
| 516 |
-
|
| 517 |
-
# If true then run the similarity, summarize, and classify functions
|
| 518 |
-
if classification:
|
| 519 |
-
classes = classify(combined_data, runtime="normal")
|
| 520 |
-
else:
|
| 521 |
-
classes = [
|
| 522 |
-
{"labels": ["No labels available."], "scores": [0]}
|
| 523 |
-
for i in range(len(combined_data))
|
| 524 |
-
]
|
| 525 |
-
|
| 526 |
-
# Calculate the elapsed time between the third and fourth checkpoints
|
| 527 |
-
fourth_checkpoint = time.time()
|
| 528 |
-
classification_time = int(fourth_checkpoint - third_checkpoint)
|
| 529 |
-
|
| 530 |
-
if summarization:
|
| 531 |
-
summaries = summarize(descriptions, runtime="normal")
|
| 532 |
-
else:
|
| 533 |
-
summaries = [
|
| 534 |
-
[{"summary_text": description}]
|
| 535 |
-
if (len(description) > 0)
|
| 536 |
-
else [{"summary_text": "No summary text is available."}]
|
| 537 |
-
for description in descriptions
|
| 538 |
-
]
|
| 539 |
-
|
| 540 |
-
# Calculate the elapsed time between the fourth and fifth checkpoints
|
| 541 |
-
fifth_checkpoint = time.time()
|
| 542 |
-
summarization_time = int(fifth_checkpoint - fourth_checkpoint)
|
| 543 |
-
|
| 544 |
-
if similarity:
|
| 545 |
-
similar_books = find_similar(combined_data)
|
| 546 |
-
else:
|
| 547 |
-
similar_books = [
|
| 548 |
-
{"sorted_by_similarity": ["No similar books available."]}
|
| 549 |
-
for i in range(len(combined_data))
|
| 550 |
-
]
|
| 551 |
-
|
| 552 |
-
# Calculate the elapsed time between the fifth and sixth checkpoints
|
| 553 |
-
sixth_checkpoint = time.time()
|
| 554 |
-
similarity_time = int(sixth_checkpoint - fifth_checkpoint)
|
| 555 |
-
|
| 556 |
-
# Calculate the total elapsed time
|
| 557 |
-
end_time = time.time()
|
| 558 |
-
runtime = f"{end_time - start_time:.2f} seconds"
|
| 559 |
-
|
| 560 |
-
# Create a list of dictionaries to store the results
|
| 561 |
-
results = []
|
| 562 |
-
for i in range(len(titles)):
|
| 563 |
-
results.append(
|
| 564 |
-
{
|
| 565 |
-
"id": i,
|
| 566 |
-
"title": titles[i],
|
| 567 |
-
"author": authors[i],
|
| 568 |
-
"publisher": publishers[i],
|
| 569 |
-
"image_link": images[i],
|
| 570 |
-
"audience": classes[i]["audience"]["labels"][0],
|
| 571 |
-
"audience_confidence": classes[i]["audience"]["scores"][0],
|
| 572 |
-
"level": classes[i]["level"]["labels"][0],
|
| 573 |
-
"level_confidence": classes[i]["level"]["scores"][0],
|
| 574 |
-
"summary": summaries[i][0]["summary_text"],
|
| 575 |
-
"similar_books": similar_books[i]["sorted_by_similarity"],
|
| 576 |
-
"runtime": {
|
| 577 |
-
"total": runtime,
|
| 578 |
-
"classification": classification_time,
|
| 579 |
-
"summarization": summarization_time,
|
| 580 |
-
"similarity": similarity_time,
|
| 581 |
-
},
|
| 582 |
-
}
|
| 583 |
-
)
|
| 584 |
-
|
| 585 |
-
return results
|
|
|
|
| 293 |
|
| 294 |
results = [
|
| 295 |
{
|
| 296 |
+
"id": i,
|
| 297 |
"title": title,
|
| 298 |
"author": author,
|
| 299 |
"publisher": publisher,
|
| 300 |
"description": description,
|
| 301 |
+
"image_link": image,
|
| 302 |
}
|
| 303 |
+
for (i, [title, author, publisher, description, image]) in enumerate(
|
| 304 |
+
zip(titles, authors, publishers, descriptions, images)
|
| 305 |
)
|
| 306 |
]
|
| 307 |
|
| 308 |
+
return results
|
|
|
|
|
|
|
| 309 |
|
| 310 |
|
| 311 |
@app.post("/classify")
|
| 312 |
+
async def classify(data: list, runtime: str = "normal"):
|
| 313 |
"""
|
| 314 |
Create classifier pipeline and return the results.
|
| 315 |
"""
|
| 316 |
+
titles = [book["title"] for book in data]
|
| 317 |
+
descriptions = [book["description"] for book in data]
|
| 318 |
+
publishers = [book["publisher"] for book in data]
|
| 319 |
|
| 320 |
# Combine title, description, and publisher into a single string
|
| 321 |
combined_data = [
|
|
|
|
| 368 |
classes = [
|
| 369 |
{
|
| 370 |
"audience": classifier_pipe(doc, audience)["labels"][0],
|
| 371 |
+
"audience_confidence": classifier_pipe(doc, audience)["scores"][0],
|
| 372 |
+
"level": classifier_pipe(doc, level)["labels"][0],
|
| 373 |
+
"level_confidence": classifier_pipe(doc, level)["scores"][0],
|
| 374 |
}
|
| 375 |
for doc in combined_data
|
| 376 |
]
|
|
|
|
| 379 |
|
| 380 |
|
| 381 |
@app.post("/find_similar")
|
| 382 |
+
async def find_similar(data: list, top_k: int = 5):
|
| 383 |
"""
|
| 384 |
+
Calculate the similarity between the selected book and the corpus. Return the top_k results.
|
| 385 |
"""
|
| 386 |
from sentence_transformers import SentenceTransformer
|
| 387 |
from sentence_transformers import util
|
| 388 |
|
| 389 |
+
titles = [book["title"] for book in data]
|
| 390 |
+
descriptions = [book["description"] for book in data]
|
| 391 |
+
publishers = [book["publisher"] for book in data]
|
| 392 |
|
| 393 |
# Combine title, description, and publisher into a single string
|
| 394 |
combined_data = [
|
|
|
|
| 403 |
top_k = len(combined_data) if top_k > len(combined_data) else top_k
|
| 404 |
|
| 405 |
similar_books = []
|
| 406 |
+
|
| 407 |
for i in range(len(combined_data)):
|
| 408 |
# Get the embedding for the ith book
|
| 409 |
current_embedding = book_embeddings[i]
|
|
|
|
| 420 |
}
|
| 421 |
)
|
| 422 |
|
| 423 |
+
return similar_books
|
|
|
|
|
|
|
| 424 |
|
| 425 |
|
| 426 |
@app.post("/summarize")
|
|
|
|
| 451 |
# Summarize the descriptions
|
| 452 |
summaries = [
|
| 453 |
summarizer_pipe(description)
|
| 454 |
+
if (len(description) > 0 and description != "Null" and description != None)
|
| 455 |
else [{"summary_text": "No summary text is available."}]
|
| 456 |
for description in descriptions
|
| 457 |
]
|
| 458 |
|
| 459 |
return summaries
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|