Spaces:
Sleeping
Sleeping
| import os | |
| import hashlib | |
| import pandas as pd | |
| from openai import OpenAI | |
| import gradio as gr | |
| input_file = "./data/sample_gpg_data.jsonl" | |
| user_df = pd.read_json(input_file, lines=True) | |
| user_ids = user_df["user_id"].unique().tolist() | |
| client = OpenAI(api_key=os.environ.get('OPENAI_API_KEY')) | |
| # Simple in-memory cache | |
| guidance_cache = {} | |
| profile_cache = {} | |
| def hash_titles(titles): | |
| joined = "\n".join(sorted(titles)) | |
| return hashlib.md5(joined.encode("utf-8")).hexdigest() | |
| def get_books(user_id): | |
| if user_id is None: | |
| return "Please select a user.", pd.DataFrame(), "" | |
| user_info = user_df.loc[user_df["user_id"] == user_id] | |
| print(user_info) | |
| books_list = user_df.loc[user_df["user_id"] == user_id, "purchased_books"].values | |
| if len(books_list) == 0: | |
| return f"No books found for {user_id}.", pd.DataFrame(), "" | |
| books = books_list[0] | |
| df = pd.DataFrame(books) | |
| df = df[['title', 'author', 'categories']].rename(columns={'title': 'Title', 'author': 'Author', 'categories': 'Category'}) | |
| books_info = generate_books(books_list) | |
| titles = [book["title"] for book in books if "title" in book] | |
| cache_key = hash_titles(titles) | |
| if cache_key in guidance_cache: | |
| guidance_response = guidance_cache[cache_key] | |
| profile_response = profile_cache[cache_key] | |
| print("✅ Using cached response") | |
| else: | |
| print("🧠 Calling OpenAI API") | |
| guidance_prompt_str = guidance_prompt(books_info) | |
| guidance_response = client.chat.completions.create( | |
| model="gpt-3.5-turbo", | |
| messages=[{"role": "user", "content": guidance_prompt_str}], | |
| temperature=0.3, | |
| max_tokens=150 | |
| ).choices[0].message.content.strip() | |
| guidance_cache[cache_key] = guidance_response | |
| profile_response = client.chat.completions.create( | |
| model="gpt-3.5-turbo", | |
| messages=[ | |
| {"role": "user", "content": profile_prompt(books_info, guidance_response)} | |
| ], | |
| temperature=0.3, | |
| max_tokens=150 | |
| ).choices[0].message.content.strip() | |
| profile_cache[cache_key] = profile_response | |
| candidates_options = list(user_info.get("candidate_options", [])) | |
| rec_prompt = build_recommendation_prompt(profile_response, candidates_options) | |
| choice = extract_choice(rec_prompt) | |
| predicted_book = candidates_options[choice-1] if choice and 1 <= choice <= len(candidates_options) else None | |
| target_book = user_info.get("target_asin", '') | |
| print("target_book:", target_book) | |
| return f"{user_id}", df, guidance_response, profile_response, rec_prompt, pd.DataFrame(candidates_options[0]), target_book.values, predicted_book[0]['asin'] | |
| def extract_choice(response_text): | |
| for token in response_text.split(): | |
| if token.strip("[]").isdigit(): | |
| return int(token.strip("[]")) | |
| return None | |
| def generate_books(books): | |
| book_combos = [] | |
| for book in books: | |
| categories = ', '.join(book[0]['categories']) | |
| book_combos.append(f"Title of the book is {book[0]['title']} and the category of the book is {categories}. Description of the book is {book[0]['description']}") | |
| return book_combos | |
| def guidance_prompt(titles): | |
| return f"""Here is a list of books a person has read:\n{chr(10).join("- " + t for t in titles)}\n\nWhat genres or themes do you notice across these books? Please list them concisely.""" | |
| def profile_prompt(titles, guidance): | |
| return f"""Here is a list of books a person has read:\n{chr(10).join("- " + t for t in titles)}\n\nBased on the following genres/themes: {guidance}\n\nSummarize this person's book preferences in one paragraph.""" | |
| def build_recommendation_prompt(profile, candidates): | |
| prompt = f"""A user has the following reading preference:\n"{profile}"\n\nHere are some books they might consider next:\n""" | |
| if len(candidates) == 1 and isinstance(candidates[0], list): | |
| candidates = candidates[0] | |
| for i, book in enumerate(candidates, start=1): | |
| prompt += f"[{i}] {book.get('title', 'Unknown Title')}\n" | |
| prompt += "\nWhich of these books best matches the user's preference? Respond ONLY with the number [1-4]." | |
| return prompt | |
| def get_books_theme(books): | |
| return | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## Select User") | |
| user_dropdown = gr.Dropdown(choices=user_df["user_id"].tolist(), value=None, label="User ID") | |
| gr.Markdown("## Selected User") | |
| output_text = gr.Textbox(show_label=False) | |
| gr.Markdown("## Books read") | |
| output_table = gr.Dataframe(label="Books Read", interactive=False, show_label=False) | |
| gr.Markdown("## User Books Theme") | |
| output_theme = gr.Textbox(label="User Books Theme", lines=8, show_label=False) | |
| gr.Markdown("## User Profile") | |
| output_profile = gr.Textbox(label="User Profile", show_label=False, lines=6) | |
| output_rec_prompt = gr.Textbox(label="Recommendation Prompt", lines=8) | |
| output_candidate_options = gr.DataFrame(label="Candidate Books") | |
| output_target_id = gr.Textbox(label="Target Book") | |
| output_predicted_book = gr.Textbox(label="Predicted Book") | |
| user_dropdown.change(fn=get_books, inputs=user_dropdown, outputs=[output_text, output_table, output_theme, output_profile, output_rec_prompt, output_candidate_options, output_target_id, output_predicted_book]) | |
| if __name__ == "__main__": | |
| demo.launch() | |