Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI | |
| # Define the FastAPI app | |
| app = FastAPI(docs_url="/") | |
| def search(query, similarity=False): | |
| import time | |
| import requests | |
| start_time = time.time() | |
| # Set the API endpoint and query parameters | |
| url = "https://www.googleapis.com/books/v1/volumes" | |
| params = {"q": str(query), "printType": "books", "maxResults": 30} | |
| # Send a GET request to the API with the specified parameters | |
| response = requests.get(url, params=params) | |
| # Initialize the lists to store the results | |
| titles = [] | |
| authors = [] | |
| publishers = [] | |
| descriptions = [] | |
| images = [] | |
| # Parse the response JSON and append the results | |
| data = response.json() | |
| for item in data["items"]: | |
| volume_info = item["volumeInfo"] | |
| try: | |
| titles.append(f"{volume_info['title']}: {volume_info['subtitle']}") | |
| except KeyError: | |
| titles.append(volume_info["title"]) | |
| try: | |
| descriptions.append(volume_info["description"]) | |
| except KeyError: | |
| descriptions.append("Null") | |
| try: | |
| publishers.append(volume_info["publisher"]) | |
| except KeyError: | |
| publishers.append("Null") | |
| try: | |
| authors.append(volume_info["authors"][0]) | |
| except KeyError: | |
| authors.append("Null") | |
| try: | |
| images.append(volume_info["imageLinks"]["thumbnail"]) | |
| except KeyError: | |
| images.append( | |
| "https://bookstoreromanceday.org/wp-content/uploads/2020/08/book-cover-placeholder.png" | |
| ) | |
| ### Openalex ### | |
| import pyalex | |
| from pyalex import Works | |
| # Add email to the config | |
| pyalex.config.email = "ber2mir@gmail.com" | |
| # Define a pager object with the same query | |
| pager = Works().search(str(query)).paginate(per_page=10, n_max=10) | |
| # Generate a list of the results | |
| openalex_results = list(pager) | |
| # Get the titles, descriptions, and publishers and append them to the lists | |
| for result in openalex_results[0]: | |
| try: | |
| titles.append(result["title"]) | |
| except KeyError: | |
| titles.append("Null") | |
| try: | |
| descriptions.append(result["abstract"]) | |
| except KeyError: | |
| descriptions.append("Null") | |
| try: | |
| publishers.append(result["host_venue"]["publisher"]) | |
| except KeyError: | |
| publishers.append("Null") | |
| try: | |
| authors.append(result["authorships"][0]["author"]["display_name"]) | |
| except KeyError: | |
| authors.append("Null") | |
| images.append( | |
| "https://bookstoreromanceday.org/wp-content/uploads/2020/08/book-cover-placeholder.png" | |
| ) | |
| ### OpenAI ### | |
| import openai | |
| # Set the OpenAI API key | |
| openai.api_key = "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" | |
| # Create ChatGPT query | |
| chatgpt_response = openai.ChatCompletion.create( | |
| model="gpt-3.5-turbo", | |
| messages=[ | |
| { | |
| "role": "system", | |
| "content": "You are a librarian. You are helping a patron find a book.", | |
| }, | |
| { | |
| "role": "user", | |
| "content": f"Recommend me 10 books about {query}. Your response should be like: 'title: <title>, author: <author>, publisher: <publisher>, summary: <summary>'", | |
| }, | |
| ], | |
| ) | |
| # Split the response into a list of results | |
| chatgpt_results = chatgpt_response["choices"][0]["message"]["content"].split("\n")[ | |
| 2::2 | |
| ] | |
| # Define a function to parse the results | |
| def parse_result(result, ordered_keys=["Title", "Author", "Publisher", "Summary"]): | |
| # Create a dict to store the key-value pairs | |
| parsed_result = {} | |
| for key in ordered_keys: | |
| # Split the result string by the key and append the value to the list | |
| if key != ordered_keys[-1]: | |
| parsed_result[key] = result.split(f"{key}: ")[1].split(",")[0] | |
| else: | |
| parsed_result[key] = result.split(f"{key}: ")[1] | |
| return parsed_result | |
| ordered_keys = ["Title", "Author", "Publisher", "Summary"] | |
| for result in chatgpt_results: | |
| # Parse the result | |
| parsed_result = parse_result(result, ordered_keys=ordered_keys) | |
| # Append the parsed result to the lists | |
| titles.append(parsed_result["Title"]) | |
| authors.append(parsed_result["Author"]) | |
| publishers.append(parsed_result["Publisher"]) | |
| descriptions.append(parsed_result["Summary"]) | |
| images.append( | |
| "https://bookstoreromanceday.org/wp-content/uploads/2020/08/book-cover-placeholder.png" | |
| ) | |
| ### Prediction ### | |
| from flair.models import TextClassifier | |
| from flair.data import Sentence | |
| from flair.tokenization import SegtokTokenizer | |
| from transformers import ( | |
| AutoTokenizer, | |
| AutoModelForSeq2SeqLM, | |
| AutoModelForSequenceClassification, | |
| pipeline, | |
| ) | |
| from sentence_transformers import SentenceTransformer, CrossEncoder | |
| from sentence_transformers.util import cos_sim, dot_score | |
| from optimum.onnxruntime import ( | |
| ORTModelForSeq2SeqLM, | |
| ORTModelForSequenceClassification, | |
| ) | |
| from optimum.pipelines import pipeline as optimum_pipeline | |
| # Load the classifiers | |
| # classifier = TextClassifier.load( | |
| # "trainers/deberta-v3-base-tasksource-nli/best-model.pt" | |
| # ) | |
| # sentence_transformer = SentenceTransformer("all-MiniLM-L12-v2") | |
| # cross_encoder = CrossEncoder("cross-encoder/stsb-distilroberta-base") | |
| # Combine title, description, and publisher into a single string | |
| combined_data = [ | |
| f"{title} {description} {publisher}" | |
| for title, description, publisher in zip(titles, descriptions, publishers) | |
| ] | |
| # Prepare the Sentence object | |
| # sentences = [ | |
| # Sentence(doc, use_tokenizer=SegtokTokenizer()) for doc in combined_data | |
| # ] | |
| # Classify the sentences | |
| # classifier.predict(sentences) | |
| # Get the predicted labels | |
| # classes = [sentence.labels for sentence in sentences] | |
| # Define the summarizer model and tokenizer | |
| sum_tokenizer = AutoTokenizer.from_pretrained("lidiya/bart-base-samsum") | |
| # sum_model = AutoModelForSeq2SeqLM.from_pretrained("sshleifer/distilbart-xsum-12-6") | |
| sum_model = AutoModelForSeq2SeqLM.from_pretrained("lidiya/bart-base-samsum") | |
| summarizer_pipeline = pipeline( | |
| "summarization", | |
| model=sum_model, | |
| tokenizer=sum_tokenizer, | |
| batch_size=64, | |
| ) | |
| # Define the zero-shot classifier | |
| zs_tokenizer = AutoTokenizer.from_pretrained( | |
| "sileod/deberta-v3-base-tasksource-nli" | |
| ) | |
| # Quickfix for the tokenizer | |
| # zs_tokenizer.model_input_names = ["input_ids", "attention_mask"] | |
| zs_model = AutoModelForSequenceClassification.from_pretrained( | |
| "sileod/deberta-v3-base-tasksource-nli" | |
| ) | |
| zs_classifier = pipeline( | |
| "zero-shot-classification", | |
| model=zs_model, | |
| tokenizer=zs_tokenizer, | |
| batch_size=64, | |
| hypothesis_template="This book is {}.", | |
| multi_label=True, | |
| ) | |
| # Summarize the descriptions | |
| summaries = [ | |
| summarizer_pipeline(description[0:1024]) | |
| if (description != None) | |
| else [{"summary_text": "Null"}] | |
| for description in descriptions | |
| ] | |
| # Predict the level of the book | |
| candidate_labels = [ | |
| "Introductory", | |
| "Advanced", | |
| "Academic", | |
| "Not Academic", | |
| "Manual", | |
| ] | |
| # Get the predicted labels | |
| classes = [zs_classifier(doc, candidate_labels) for doc in combined_data] | |
| # Calculate the elapsed time | |
| end_time = time.time() | |
| runtime = f"{end_time - start_time:.2f} seconds" | |
| # Calculate the similarity between the books | |
| if similarity: | |
| from sentence_transformers import util | |
| sentence_transformer = SentenceTransformer("all-MiniLM-L6-v2") | |
| book_embeddings = sentence_transformer.encode( | |
| combined_data, convert_to_tensor=True | |
| ) | |
| similar_books = [] | |
| for i in range(len(titles)): | |
| current_embedding = book_embeddings[i] | |
| similarity_sorted = util.semantic_search( | |
| current_embedding, book_embeddings, top_k=20 | |
| ) | |
| similar_books.append( | |
| { | |
| "sorted_by_similarity": similarity_sorted[0][1:], | |
| } | |
| ) | |
| # Create a list of dictionaries to store the results | |
| results = [] | |
| for i in range(len(titles)): | |
| results.append( | |
| { | |
| "id": i, | |
| "title": titles[i], | |
| "author": authors[i], | |
| "publisher": publishers[i], | |
| "image_link": images[i], | |
| "labels": classes[i]["labels"][0:2], | |
| "label_confidences": classes[i]["scores"][0:2], | |
| "summary": summaries[i][0]["summary_text"], | |
| "similar_books": similar_books[i]["sorted_by_similarity"], | |
| "runtime": runtime, | |
| } | |
| ) | |
| return results | |