Spaces:
Sleeping
Sleeping
| import traceback | |
| try: | |
| import pandas as pd | |
| path = 'data/training_data_telugu-hate.xlsx' | |
| print(f"Loading {path}...") | |
| df = pd.read_excel(path) | |
| print(f"Original shape: {df.shape}") | |
| # 1. Back up the original file just in case | |
| df.to_excel('data/training_data_telugu-hate_backup.xlsx', index=False) | |
| # Clean duplicates and nans | |
| df = df.dropna(subset=['Comments', 'Label']) | |
| df['Comments'] = df['Comments'].astype(str).str.strip() | |
| df['Label'] = df['Label'].astype(str).str.strip().str.lower() | |
| df = df[df['Label'].isin(['hate', 'non-hate'])] | |
| df = df.drop_duplicates(subset=['Comments'], keep='first') | |
| print(f"Shape after cleaning: {df.shape}") | |
| # New words | |
| toxic = [ | |
| "rey mental puku", "ni edava veshalu", "konda erri hook", "thu ni brathuku cheda", "panimashiva ra nuvu", | |
| "erri puku nayala", "nuvu oka pedda jaffa", "siggu ledu ra neeku", "pichi pulka gadu", "waste fellow ra nuvu", | |
| "dengay ra lathkor", "ni yamma kadupula koti", "adangi vedhava", "gudda balupu", "boku gadu vidu", | |
| "rey chetha na kodaka", "poramboku nayala", "ni mokam chudu elagundo", "chapri gadu lanjodka", "lavada lo panulu", | |
| "modda em kadu le", "pachi boothulu tidutha", "daridrudu", "tuppas gadu", "chavata chavata", | |
| "mental gadu ra vidu", "sannasi", "bewarse gadu", "ne bondha ra ne bondha", "rey puku", | |
| "vedava sannaasi", "guddalo em ledha", "ni amma", "ni abba", "rey lanjodoka", "addamina waste gadu", | |
| "rotta gadu", "faltu gadu", "picha light teesko ra puku", "lathkor gadu", "erri pusa", | |
| "bazar munda", "rey kojja nayala", "ni ayya ki cheppu", "solu gadu", "sollu cheppaku nayala", | |
| "arey howle", "bhadcow gadu", "puka musko", "rey ni amma", "denga beta", | |
| "ni puku lo na modda", "erri guda", "nuvvu oka waste puku", "ni yabba", "dunnapothu nayala", | |
| "munda mokam", "sulli gadu", "arey erri", "pedda puku", "mental na kodaka", "lanja kodaka", | |
| "ni amma ranku", "chethana kodaka", "musali puku", "gudda chimputha", "ni amma ninnu kaninda", | |
| "rey neeku guddalo dammu leda", "ni mokam meda umma", "chepaleni boothulu", "thu ni bathuku", "kukka brathuku", | |
| "ni bathuku bus stand", "picchi puku", "hook gani laga unnav", "gadida kodaka", | |
| "donga puku", "munda edava", "musko ra jaffa", "bocchu gadu", "ni ayya puku", "naa modda guduvu", | |
| "lavadalo comments", "item gani laga unnav", "loffer gadu", "ni face ki dippa okate takkuva", "pakodi gadu", | |
| "mental hospital ki ellu", "rey pichi guda", "bithiri", "buffoon gadu", "420 gadu", | |
| "ne kamma", "ni bondha pettu", "kothi na kodaka", "labor na kodaka", "signal daggara adukko", "Footpath gadu" | |
| ] | |
| safe = [ | |
| "super undi bro", "congrats macha", "all the best ra", "chala bagundi", "kekaa", | |
| "thanks anna", "subram ga undi", "awesome work", "good job keep it up", "nice explanation", | |
| "this is very helpful", "mee video lu ante chala ishtam", "first comment ra", "video super", "nice editing", | |
| "super ga chepparu", "meeru inka goppavallu avvali", "waiting for next part", "good morning everyone", "have a nice day", | |
| "really nice bro", "bhale cheppav", "good point", "manchi maata", "exactly macha", | |
| "agreed", "well said anna", "proud of you", "jai hind", "super hit", | |
| "very informative", "hats off to you", "good lesson learned", "superb acting", "next level", | |
| "mind blowing performance", "keep soaring high", "bagundi", "baga chesaaru", "congratulations brother", | |
| "so beautiful", "very nice song", "loved this", "manchi content idhi", "thank you so much", | |
| "keep going", "amazing as always", "very true words", "good luck", "edo oka roju sadhistavu", | |
| "meeru goppa anna", "salute anna", "inspiring video", "bhale tisaaru", "cinematography peaks", | |
| "this made my day", "chala happy ga undi", "super star nvvu", "naaku idi chala use aindi", "respect", | |
| "god bless you", "super anna", "keep doing videos", "nenu subscribe chesa", "like kottandi", | |
| "miku manchi jargali", "great progress", "awesome efforts", "very nice tutorial", "fantastic", | |
| "proud moment", "excellent work", "bhale undi kada", "super ga navvu", "nice smile", | |
| "thanks for your support", "manchi advice", "helpful tips", "very clear", "super bro super", | |
| "love from hyd", "amazing talent", "keep rocking", "gret job", "so soothing", | |
| "wonderful video", "sweet comments", "very kind of you", "thank you akka", "wow super", | |
| "masterpiece", "great info", "good stuff", "so positive", "happy for you", "best wishes", | |
| "take care", "always supporting you", "superb explanation", "nice tutorial bro", "you are the best" | |
| ] | |
| # Map to new rows | |
| new_rows = [] | |
| for t in toxic: | |
| new_rows.append({'S.No': 'AUGMENTED_HATE', 'Comments': t, 'Label': 'hate'}) | |
| for s in safe: | |
| new_rows.append({'S.No': 'AUGMENTED_SAFE', 'Comments': s, 'Label': 'non-hate'}) | |
| augment_df = pd.DataFrame(new_rows) | |
| final_df = pd.concat([df, augment_df], ignore_index=True) | |
| # Overwrite | |
| final_df.to_excel(path, index=False) | |
| print(f"Final shape: {final_df.shape}") | |
| print("✅ Augmentation complete! Successfully wrote to Excel.") | |
| except Exception as e: | |
| with open('error_log.txt', 'w') as f: | |
| f.write(traceback.format_exc()) | |
| print("Script failed. See error_log.txt") | |