Spaces:
Running
Running
File size: 6,254 Bytes
1462675 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 | import pandas as pd
import warnings
import re
warnings.filterwarnings("ignore")
from fuzzywuzzy import process
def standardize_author_names(df):
# Create a list of unique author names
unique_authors = df['Author'].unique()
# Create a dictionary to map original names to standard names
author_map = {}
for author in unique_authors:
match = process.extractOne(author, author_map.keys())
if match and match[1] > 90: # Adjust threshold as needed
author_map[author] = match[0]
else:
author_map[author] = author
# Apply the mapping to the dataframe
df['Author'] = df['Author'].map(author_map)
return df
def is_contained(subtitle, fulltitle):
return subtitle in fulltitle
def consolidate_titles_based_on_containment(df):
# Get unique titles
titles = df['Title'].unique()
# Create a dictionary to map original titles to consolidated titles
title_map = {}
for title in titles:
# Check if the title is already in the map
if title not in title_map:
# Find all titles that this title is contained in
for candidate in titles:
if "All Stars Bonus" not in candidate and title != candidate and is_contained(title, candidate):
title_map[title] = candidate
break
else:
title_map[title] = title
# Apply the mapping to the dataframe
df['Title'] = df['Title'].map(title_map)
return df
def generate_report(royalties_path, payouts_path):
sales_df = pd.read_excel(royalties_path, sheet_name="Total Earnings", header=1)
payments_df = pd.read_excel(payouts_path, sheet_name="Payments")
sales_df_trim = sales_df[["Title", "Author", "Payout Plan", "Currency", "Earnings"]]
sales_df_trim['Title'] = sales_df_trim.apply(
lambda row: f"{row['Title']} ({row['Payout Plan']})" if row['Payout Plan'] == "All Stars Bonus" else row['Title'],
axis=1)
royalties_norm_text = consolidate_titles_based_on_containment(standardize_author_names(sales_df_trim))
royalties_currencies = royalties_norm_text.groupby('Currency', as_index=False).agg({'Earnings':'sum'})
payouts_df= payments_df[["Currency","Net Earnings","FX Rate","Payout Amount"]].dropna(subset=['Payout Amount'])
payouts_df = payouts_df.groupby('Currency', as_index=False).agg({'Net Earnings':'sum', 'FX Rate':'mean', 'Payout Amount':'sum'})
payouts_df['FX Rate'] = payouts_df['FX Rate'].fillna(1)
merged_df = pd.merge(royalties_currencies, payouts_df, on='Currency', how='left')
merged_df['Earnings_conversion'] = merged_df['Earnings'] * merged_df['FX Rate']
merged_df['Division_Quotient'] = merged_df['Earnings_conversion']/merged_df['Payout Amount']
merged_df['Adjusted_FX_rate'] = (merged_df['FX Rate']/merged_df['Division_Quotient'])
final_royalties = pd.merge(royalties_norm_text,(merged_df[['Currency', 'Adjusted_FX_rate']]), on='Currency', how='left')
final_royalties['Adjusted_Earnings'] = final_royalties['Earnings'] * final_royalties['Adjusted_FX_rate']
return final_royalties
def print_report(royalties_path, payments_path):
final_royalties = generate_report(royalties_path, payments_path)
grouped = final_royalties.groupby(['Author', 'Title'], as_index=False)['Adjusted_Earnings'].sum()
# Step 5: Generate the report
print("Total: ", final_royalties["Adjusted_Earnings"].sum())
print("-----------------------\n")
for author, group in grouped.groupby('Author'):
total_earnings = group['Adjusted_Earnings'].sum()
print(f"Author: {author}")
print(f"Total Earnings: ${total_earnings:.2f}")
# Loop through the books and print each title with earnings
for _, row in group.iterrows():
print(f" {row['Title']}: ${row['Adjusted_Earnings']:.2f}")
print("\n")
def generate_report_for_insertion(royalties_path, payouts_path):
"""Generates a DataFrame of individual royalty entries for database insertion."""
# Extract month and year from the royalties_path filename
match = re.search(r"(\d{4})-(\d{2})", royalties_path)
if match:
year = int(match.group(1))
month = int(match.group(2))
else:
# Handle cases where the pattern doesn't match (e.g., raise an error or use defaults)
raise ValueError("Could not extract year and month from royalties_path filename.")
sales_df = pd.read_excel(royalties_path, sheet_name="Total Earnings", header=1)
payments_df = pd.read_excel(payouts_path, sheet_name="Payments")
sales_df_trim = sales_df[["Title", "Author", "Payout Plan", "Currency", "Earnings"]]
sales_df_trim['Title'] = sales_df_trim.apply(
lambda row: f"{row['Title']} ({row['Payout Plan']})" if row['Payout Plan'] == "All Stars Bonus" else row['Title'],
axis=1
)
royalties_norm_text = consolidate_titles_based_on_containment(standardize_author_names(sales_df_trim))
royalties_currencies = royalties_norm_text.groupby('Currency', as_index=False).agg({'Earnings': 'sum'})
payouts_df = payments_df[["Currency", "Net Earnings", "FX Rate", "Payout Amount"]].dropna(subset=['Payout Amount'])
payouts_df = payouts_df.groupby('Currency', as_index=False).agg({'Net Earnings': 'sum', 'FX Rate': 'mean', 'Payout Amount': 'sum'})
payouts_df['FX Rate'] = payouts_df['FX Rate'].fillna(1)
merged_df = pd.merge(royalties_currencies, payouts_df, on='Currency', how='left')
merged_df['Earnings_conversion'] = merged_df['Earnings'] * merged_df['FX Rate']
merged_df['Division_Quotient'] = merged_df['Earnings_conversion'] / merged_df['Payout Amount']
merged_df['Adjusted_FX_rate'] = (merged_df['FX Rate'] / merged_df['Division_Quotient'])
final_royalties = pd.merge(royalties_norm_text, (merged_df[['Currency', 'Adjusted_FX_rate']]), on='Currency', how='left')
final_royalties['Adjusted_Earnings'] = final_royalties['Earnings'] * final_royalties['Adjusted_FX_rate']
# Add month and year columns
final_royalties['month'] = month
final_royalties['year'] = year
# Return the DataFrame
return final_royalties
|