TrashTopic / ctfidf_config.json
chalseee's picture
Fixed stop word bug
33e54a8 verified
{
"ctfidf_model": {
"bm25_weighting": false,
"reduce_frequent_words": true
},
"vectorizer_model": {
"params": {
"analyzer": "word",
"binary": false,
"decode_error": "strict",
"encoding": "utf-8",
"input": "content",
"lowercase": true,
"max_df": 1.0,
"max_features": null,
"min_df": 2,
"ngram_range": [
1,
1
],
"stop_words": [
"but",
"where",
"over",
"therein",
"everything",
"too",
"mine",
"re",
"whenever",
"hence",
"either",
"wherever",
"below",
"i",
"alone",
"should",
"no",
"nothing",
"made",
"with",
"one",
"almost",
"something",
"somewhere",
"ours",
"still",
"among",
"those",
"couldnt",
"nevertheless",
"once",
"go",
"why",
"what",
"by",
"for",
"when",
"whence",
"move",
"meanwhile",
"we",
"fifty",
"out",
"seem",
"moreover",
"elsewhere",
"con",
"without",
"myself",
"bottom",
"anyhow",
"whatever",
"will",
"have",
"amoungst",
"though",
"therefore",
"such",
"often",
"himself",
"whoever",
"to",
"latter",
"same",
"full",
"had",
"was",
"into",
"twelve",
"even",
"may",
"three",
"her",
"itself",
"behind",
"next",
"formerly",
"thereupon",
"rather",
"detail",
"others",
"via",
"whereupon",
"whether",
"ever",
"after",
"during",
"each",
"its",
"so",
"because",
"whereas",
"here",
"of",
"thus",
"nobody",
"sometimes",
"also",
"through",
"fire",
"hereupon",
"side",
"already",
"other",
"or",
"serious",
"everyone",
"became",
"several",
"a",
"co",
"back",
"anywhere",
"indeed",
"hereafter",
"interest",
"fifteen",
"all",
"he",
"somehow",
"besides",
"up",
"whom",
"everywhere",
"been",
"hers",
"call",
"are",
"latterly",
"there",
"against",
"per",
"beforehand",
"noone",
"our",
"toward",
"herself",
"seeming",
"describe",
"off",
"eight",
"anything",
"becoming",
"yourself",
"am",
"nine",
"would",
"please",
"be",
"much",
"well",
"under",
"ie",
"ltd",
"themselves",
"whereafter",
"twenty",
"very",
"sixty",
"front",
"more",
"cannot",
"whither",
"ten",
"not",
"own",
"someone",
"former",
"only",
"it",
"you",
"how",
"any",
"above",
"become",
"found",
"if",
"before",
"your",
"upon",
"get",
"cry",
"across",
"the",
"namely",
"yourselves",
"ourselves",
"thick",
"seems",
"none",
"inc",
"take",
"thin",
"which",
"whose",
"thence",
"forty",
"put",
"who",
"from",
"on",
"us",
"is",
"except",
"herein",
"fill",
"now",
"mostly",
"an",
"least",
"amongst",
"give",
"first",
"many",
"me",
"could",
"their",
"system",
"hasnt",
"thereby",
"together",
"de",
"can",
"anyone",
"four",
"name",
"beside",
"anyway",
"always",
"whole",
"else",
"this",
"un",
"yours",
"few",
"most",
"again",
"yet",
"eg",
"due",
"third",
"within",
"becomes",
"hereby",
"they",
"until",
"see",
"less",
"nowhere",
"cant",
"has",
"another",
"two",
"show",
"both",
"further",
"six",
"bill",
"keep",
"find",
"last",
"some",
"along",
"as",
"she",
"at",
"amount",
"about",
"never",
"down",
"wherein",
"sometime",
"throughout",
"thru",
"hundred",
"etc",
"might",
"whereby",
"enough",
"thereafter",
"part",
"my",
"and",
"in",
"eleven",
"empty",
"seemed",
"beyond",
"perhaps",
"around",
"mill",
"top",
"although",
"every",
"him",
"these",
"then",
"otherwise",
"onto",
"towards",
"done",
"that",
"than",
"his",
"were",
"sincere",
"them",
"five",
"must",
"neither",
"between",
"being",
"however",
"while",
"afterwards",
"do",
"since",
"nor",
"thank",
"person",
"persons",
"driver",
"yard",
"recycle",
"recycling",
"waste",
"bin",
"bins",
"can",
"cans",
"organization",
"location",
"person",
"date_timejuly",
"christmas",
"year",
"weve",
"ups",
"hes",
"shes",
"men",
"girl",
"man",
"grandson"
],
"strip_accents": null,
"token_pattern": "(?u)\\b\\w\\w+\\b",
"vocabulary": null
},
"vocab": {
"datetime": 204,
"comes": 142,
"theyre": 858,
"friendly": 336,
"young": 970,
"love": 496,
"garbage": 342,
"trucks": 888,
"gave": 343,
"nice": 538,
"honk": 406,
"excellent": 278,
"service": 755,
"called": 98,
"office": 554,
"spoke": 802,
"got": 359,
"exceptional": 279,
"following": 325,
"making": 501,
"sure": 832,
"request": 702,
"addressed": 20,
"folks": 322,
"like": 477,
"good": 358,
"job": 442,
"people": 581,
"collect": 136,
"trash": 883,
"met": 512,
"awesome": 63,
"company": 153,
"waited": 923,
"closed": 134,
"account": 11,
"sent": 753,
"times": 869,
"watch": 932,
"try": 891,
"guys": 371,
"missed": 515,
"pick": 588,
"problem": 620,
"grateful": 364,
"services": 757,
"thanks": 853,
"helpful": 393,
"explaining": 291,
"voice": 921,
"message": 511,
"regarding": 681,
"damaged": 202,
"dumpster": 243,
"commercial": 145,
"property": 633,
"appreciated": 39,
"professionalism": 624,
"conversation": 182,
"update": 905,
"canceled": 103,
"assured": 53,
"received": 669,
"showing": 772,
"showed": 771,
"money": 520,
"responded": 714,
"end": 263,
"date": 203,
"know": 454,
"new": 537,
"owners": 569,
"did": 218,
"dump": 240,
"expect": 284,
"pay": 577,
"unacceptable": 896,
"say": 742,
"far": 304,
"better": 73,
"wasnt": 931,
"really": 665,
"impressed": 420,
"satisfied": 740,
"positive": 609,
"pleasure": 601,
"speaking": 799,
"drivers": 235,
"kids": 448,
"favorite": 306,
"day": 206,
"wave": 933,
"great": 365,
"rep": 692,
"use": 914,
"pickup": 592,
"month": 522,
"going": 354,
"town": 880,
"scheduled": 746,
"street": 823,
"truck": 887,
"wouldnt": 965,
"picked": 589,
"neighbors": 536,
"appreciate": 38,
"quick": 647,
"scheduling": 747,
"does": 227,
"days": 207,
"just": 445,
"phone": 587,
"named": 527,
"wish": 949,
"cheerful": 127,
"customer": 199,
"outstanding": 567,
"easy": 247,
"make": 499,
"changes": 119,
"super": 828,
"reliable": 687,
"representative": 698,
"incredibly": 425,
"talk": 842,
"entire": 269,
"experience": 287,
"straightforward": 822,
"listened": 479,
"answered": 29,
"questions": 646,
"clearly": 133,
"provided": 635,
"truly": 890,
"friendliness": 335,
"app": 36,
"makes": 500,
"communicate": 148,
"remind": 689,
"long": 489,
"realized": 664,
"big": 74,
"forgotten": 328,
"container": 179,
"saw": 741,
"house": 409,
"took": 875,
"time": 867,
"dumped": 241,
"brought": 92,
"caring": 110,
"consistent": 174,
"things": 861,
"look": 490,
"collection": 138,
"thankful": 851,
"extra": 293,
"bags": 69,
"probably": 619,
"wonderful": 953,
"stand": 808,
"employee": 258,
"want": 928,
"pleasant": 599,
"response": 715,
"working": 960,
"easier": 246,
"change": 118,
"complicated": 164,
"patient": 576,
"kind": 449,
"break": 86,
"costs": 186,
"simple": 777,
"way": 936,
"understand": 897,
"amazing": 27,
"hiring": 400,
"taken": 839,
"care": 108,
"quickly": 648,
"im": 417,
"leaving": 468,
"star": 810,
"review": 718,
"human": 412,
"answering": 30,
"dont": 232,
"agent": 22,
"answers": 31,
"today": 872,
"assistance": 51,
"quality": 644,
"walked": 926,
"accounts": 12,
"work": 956,
"happy": 381,
"live": 482,
"community": 151,
"team": 846,
"wanted": 929,
"share": 765,
"ive": 441,
"twice": 895,
"knowledgeable": 457,
"line": 478,
"help": 391,
"drop": 238,
"went": 941,
"placed": 596,
"best": 72,
"stopped": 821,
"talked": 843,
"home": 404,
"complete": 161,
"upbeat": 904,
"smile": 787,
"attitude": 55,
"prompt": 628,
"polite": 605,
"courteous": 191,
"staff": 807,
"satisfaction": 738,
"glad": 352,
"informative": 428,
"informed": 429,
"professional": 623,
"email": 256,
"support": 830,
"agents": 23,
"accommodating": 10,
"route": 727,
"business": 94,
"helped": 392,
"smoothly": 789,
"meet": 508,
"needs": 532,
"picking": 590,
"moving": 525,
"state": 814,
"mean": 507,
"problems": 621,
"billing": 76,
"incredible": 424,
"family": 301,
"looks": 492,
"forward": 330,
"window": 948,
"hi": 397,
"moved": 524,
"unfortunately": 901,
"unprofessional": 902,
"url": 911,
"bit": 77,
"generally": 344,
"item": 438,
"wonder": 952,
"excuse": 282,
"sorry": 796,
"stuck": 825,
"alternative": 26,
"choose": 129,
"process": 622,
"transferring": 882,
"address": 19,
"thorough": 863,
"asset": 50,
"datetimes": 205,
"world": 962,
"committed": 147,
"issue": 436,
"husband": 414,
"frustrating": 338,
"calls": 100,
"absolutely": 8,
"real": 662,
"answer": 28,
"wait": 922,
"hang": 377,
"handling": 375,
"picks": 591,
"come": 141,
"ground": 367,
"dependable": 214,
"dropped": 239,
"sides": 775,
"flying": 321,
"left": 469,
"upright": 910,
"organizations": 565,
"noticed": 546,
"werent": 942,
"curb": 196,
"id": 415,
"asked": 48,
"wed": 939,
"getting": 347,
"car": 105,
"efficient": 251,
"resolution": 708,
"told": 874,
"taking": 841,
"customers": 200,
"example": 277,
"snow": 790,
"refuse": 680,
"maybe": 506,
"terrible": 850,
"deserve": 215,
"options": 563,
"monopoly": 521,
"boy": 85,
"lets": 471,
"waves": 935,
"toddler": 873,
"outside": 566,
"disposal": 226,
"places": 597,
"online": 558,
"yes": 969,
"away": 62,
"lucky": 497,
"payment": 579,
"having": 386,
"payments": 580,
"box": 83,
"machine": 498,
"said": 737,
"shed": 766,
"check": 124,
"happened": 379,
"tell": 849,
"kept": 447,
"talking": 844,
"hold": 402,
"came": 101,
"putting": 643,
"wrong": 968,
"blaming": 79,
"agreed": 24,
"choice": 128,
"appreciation": 40,
"containers": 180,
"explained": 290,
"ask": 47,
"receptacles": 673,
"instead": 432,
"saying": 743,
"fact": 297,
"hope": 407,
"representatives": 699,
"ensure": 266,
"delivering": 213,
"level": 472,
"management": 502,
"option": 562,
"available": 60,
"case": 113,
"cart": 111,
"gone": 356,
"set": 760,
"lady": 462,
"elderly": 255,
"mother": 523,
"park": 572,
"neighborhood": 535,
"whos": 945,
"seeing": 749,
"stars": 811,
"delivered": 212,
"exactly": 276,
"spot": 804,
"aware": 61,
"looking": 491,
"safety": 736,
"open": 559,
"recently": 672,
"issues": 437,
"pictures": 594,
"non": 541,
"throw": 865,
"thats": 854,
"replacement": 695,
"spent": 801,
"figuring": 312,
"exchange": 280,
"lived": 483,
"competent": 156,
"point": 603,
"solution": 791,
"special": 800,
"takes": 840,
"little": 481,
"driving": 237,
"different": 220,
"expectation": 285,
"shows": 774,
"kudos": 459,
"leave": 466,
"mess": 510,
"hard": 383,
"order": 564,
"beat": 70,
"cracked": 192,
"replaced": 694,
"previous": 613,
"area": 43,
"items": 439,
"immediately": 418,
"jumped": 444,
"remove": 691,
"huge": 411,
"dirty": 224,
"actually": 15,
"small": 786,
"kindness": 451,
"calling": 99,
"ensured": 267,
"recognition": 675,
"deserves": 216,
"supervisor": 829,
"place": 595,
"personable": 584,
"lives": 484,
"setting": 762,
"current": 197,
"broken": 91,
"carts": 112,
"setup": 763,
"swapped": 835,
"notch": 543,
"cancel": 102,
"impeccable": 419,
"highly": 398,
"recommend": 676,
"provider": 636,
"emptying": 261,
"emptied": 260,
"completely": 162,
"deal": 208,
"complaints": 160,
"happier": 380,
"telephone": 848,
"helpfulness": 394,
"skills": 785,
"stood": 820,
"professionals": 626,
"provide": 634,
"clear": 132,
"knowledge": 456,
"size": 782,
"upgrade": 908,
"dedication": 210,
"ensuring": 268,
"handled": 374,
"promptly": 629,
"accurately": 13,
"efficiency": 250,
"addressing": 21,
"felt": 311,
"believe": 71,
"relationships": 686,
"kindly": 450,
"upgraded": 909,
"residential": 706,
"forgot": 327,
"ran": 652,
"fun": 339,
"miss": 514,
"especially": 271,
"let": 470,
"bring": 89,
"couple": 189,
"houses": 410,
"cycle": 201,
"extremely": 294,
"able": 6,
"filled": 314,
"needed": 531,
"urley": 913,
"need": 530,
"possible": 610,
"rain": 650,
"culdesac": 195,
"autopay": 59,
"charging": 123,
"bag": 68,
"fee": 307,
"remember": 688,
"35": 3,
"dollars": 231,
"station": 816,
"tried": 885,
"using": 916,
"form": 329,
"think": 862,
"friend": 334,
"reached": 657,
"cancelled": 104,
"handle": 373,
"mile": 513,
"question": 645,
"nicest": 539,
"policy": 604,
"fantastic": 303,
"contact": 177,
"information": 427,
"stated": 815,
"receive": 668,
"helping": 395,
"situation": 781,
"ones": 557,
"week": 940,
"gonna": 357,
"late": 464,
"lol": 488,
"future": 340,
"efficiently": 252,
"automated": 57,
"respond": 713,
"updated": 906,
"proof": 630,
"right": 722,
"correct": 184,
"companies": 152,
"obviously": 548,
"tired": 871,
"requesting": 704,
"theirs": 855,
"doesnt": 228,
"guess": 368,
"women": 951,
"multiple": 526,
"occasions": 549,
"happen": 378,
"surprise": 834,
"thing": 860,
"blown": 80,
"attention": 54,
"follow": 323,
"practically": 612,
"matter": 505,
"offering": 552,
"reminder": 690,
"calendar": 97,
"lost": 494,
"life": 475,
"woman": 950,
"itll": 440,
"consistently": 175,
"pickups": 593,
"gotten": 360,
"past": 575,
"arrived": 45,
"knocked": 453,
"night": 540,
"employees": 259,
"public": 639,
"signed": 776,
"increase": 423,
"didnt": 219,
"apologized": 35,
"explanation": 292,
"horrible": 408,
"treat": 884,
"boxes": 84,
"checking": 126,
"tossed": 876,
"cardboard": 107,
"realize": 663,
"sprung": 806,
"action": 14,
"gentlemen": 345,
"literally": 480,
"number": 547,
"hear": 387,
"locate": 487,
"communicating": 149,
"collected": 137,
"effort": 253,
"shout": 769,
"coming": 143,
"dumping": 242,
"respectful": 712,
"steps": 819,
"fan": 302,
"fail": 298,
"homes": 405,
"failed": 299,
"till": 866,
"sad": 733,
"constantly": 176,
"giving": 351,
"definitely": 211,
"breath": 87,
"fresh": 333,
"air": 25,
"absolute": 7,
"reason": 666,
"anymore": 32,
"manner": 504,
"careful": 109,
"unreliable": 903,
"poor": 607,
"accident": 9,
"supposed": 831,
"regular": 684,
"grabbing": 362,
"businesses": 95,
"totally": 877,
"knew": 452,
"closing": 135,
"soon": 795,
"resolving": 711,
"note": 544,
"commend": 144,
"turn": 893,
"doing": 229,
"appropriately": 42,
"responsive": 716,
"inquired": 430,
"pricing": 616,
"schedule": 745,
"apparently": 37,
"half": 372,
"lids": 474,
"bother": 82,
"reading": 660,
"whats": 943,
"task": 845,
"read": 659,
"word": 955,
"computer": 165,
"isnt": 435,
"properly": 632,
"error": 270,
"waiting": 924,
"disappointing": 225,
"heck": 390,
"youre": 971,
"face": 296,
"guy": 370,
"road": 723,
"ethic": 272,
"navigating": 529,
"busy": 96,
"city": 130,
"streets": 824,
"rig": 721,
"skill": 784,
"sets": 761,
"apart": 34,
"personality": 586,
"tough": 879,
"energy": 265,
"seen": 750,
"lift": 476,
"forget": 326,
"commitment": 146,
"seriously": 754,
"rules": 730,
"true": 889,
"sense": 752,
"short": 767,
"rock": 725,
"used": 915,
"serviced": 756,
"price": 614,
"works": 961,
"gets": 346,
"100": 1,
"offered": 551,
"info": 426,
"dealing": 209,
"refreshing": 678,
"bright": 88,
"communication": 150,
"weather": 937,
"related": 685,
"recent": 671,
"assisted": 52,
"offer": 550,
"reps": 701,
"fabulous": 295,
"timely": 868,
"aside": 46,
"zero": 972,
"greatly": 366,
"bobbiejo": 81,
"resolved": 710,
"reaching": 658,
"missing": 517,
"workers": 959,
"holidays": 403,
"regards": 683,
"blame": 78,
"charge": 120,
"prices": 615,
"return": 717,
"shady": 764,
"feel": 308,
"bad": 67,
"quite": 649,
"ready": 661,
"duty": 244,
"difficult": 221,
"complain": 158,
"drive": 234,
"grab": 361,
"touch": 878,
"lazy": 465,
"rude": 729,
"trying": 892,
"charges": 122,
"mistake": 518,
"proper": 631,
"step": 818,
"started": 813,
"theyve": 859,
"confusing": 170,
"site": 780,
"correctly": 185,
"followed": 324,
"backed": 66,
"older": 556,
"heard": 388,
"course": 190,
"billed": 75,
"wants": 930,
"reach": 656,
"turned": 894,
"concerned": 166,
"frame": 331,
"spring": 805,
"despite": 217,
"puts": 642,
"position": 608,
"exchanged": 281,
"owner": 568,
"willing": 947,
"anytime": 33,
"goes": 353,
"worked": 957,
"send": 751,
"notice": 545,
"worker": 958,
"pride": 617,
"worst": 963,
"servicing": 758,
"driveway": 236,
"waved": 934,
"pull": 640,
"personal": 585,
"ladies": 461,
"managers": 503,
"solve": 793,
"solutions": 792,
"excuses": 283,
"continue": 181,
"guided": 369,
"particularly": 573,
"solving": 794,
"humor": 413,
"considering": 173,
"speak": 798,
"prior": 618,
"directly": 223,
"inclement": 421,
"conditions": 168,
"single": 779,
"round": 726,
"trip": 886,
"cautious": 116,
"playing": 598,
"local": 486,
"serving": 759,
"thankless": 852,
"row": 728,
"shown": 773,
"50": 5,
"run": 731,
"valued": 919,
"lack": 460,
"competition": 157,
"switch": 837,
"loose": 493,
"12": 2,
"ridiculous": 720,
"free": 332,
"idiot": 416,
"given": 349,
"license": 473,
"expensive": 286,
"compared": 155,
"hit": 401,
"pun": 641,
"intended": 434,
"neighbor": 534,
"feet": 310,
"thought": 864,
"post": 611,
"arent": 44,
"plus": 602,
"suck": 826,
"sucks": 827,
"knowing": 455,
"understood": 900,
"urle": 912,
"credit": 194,
"finally": 315,
"joke": 443,
"paying": 578,
"hauling": 384,
"companys": 154,
"fly": 320,
"negative": 533,
"reviews": 719,
"flooded": 319,
"roads": 724,
"large": 463,
"wont": 954,
"bunch": 93,
"babies": 65,
"wheel": 944,
"graciously": 363,
"replied": 696,
"raise": 651,
"rates": 655,
"awful": 64,
"says": 744,
"mention": 509,
"shouldnt": 768,
"complex": 163,
"currently": 198,
"encountered": 262,
"knows": 458,
"theyll": 857,
"counting": 188,
"writing": 967,
"ended": 264,
"satisfactory": 739,
"normal": 542,
"report": 697,
"wow": 966,
"understandable": 898,
"charged": 121,
"fees": 309,
"collectors": 140,
"worth": 964,
"standard": 809,
"heavy": 389,
"regardless": 682,
"adding": 17,
"10": 0,
"dollar": 230,
"navigate": 528,
"safely": 735,
"keeping": 446,
"clean": 131,
"gig": 348,
"harbor": 382,
"rare": 654,
"fast": 305,
"connected": 171,
"concerns": 167,
"understanding": 899,
"sympathetic": 838,
"requested": 703,
"requests": 705,
"moms": 519,
"safe": 734,
"edit": 248,
"appreciative": 41,
"traffic": 881,
"second": 748,
"recommended": 677,
"shoutout": 770,
"misses": 516,
"checked": 125,
"eventually": 273,
"old": 555,
"hands": 376,
"everytime": 275,
"experienced": 288,
"reasons": 667,
"brings": 90,
"early": 245,
"hire": 399,
"wife": 946,
"usually": 917,
"add": 16,
"pleased": 600,
"opinion": 561,
"caused": 115,
"receptionist": 674,
"provides": 637,
"finish": 316,
"professionally": 625,
"variety": 920,
"challenges": 117,
"coordinate": 183,
"politely": 606,
"tip": 870,
"leaves": 467,
"sincerely": 778,
"resolve": 709,
"couch": 187,
"effectively": 249,
"updates": 907,
"3rd": 4,
"sort": 797,
"receiving": 670,
"walk": 925,
"lot": 495,
"stayed": 817,
"complaint": 159,
"website": 938,
"automatic": 58,
"surgery": 833,
"sizes": 783,
"instantly": 431,
"sweet": 836,
"paid": 571,
"offers": 553,
"difficulties": 222,
"providing": 638,
"promised": 627,
"efforts": 254,
"cause": 114,
"including": 422,
"random": 653,
"perfect": 582,
"fixed": 318,
"asking": 49,
"fix": 317,
"everyday": 274,
"havent": 385,
"contacted": 178,
"represents": 700,
"residents": 707,
"theres": 856,
"period": 583,
"emails": 257,
"replace": 693,
"conscientious": 172,
"collector": 139,
"gives": 350,
"integrity": 433,
"opening": 560,
"hesitation": 396,
"frustrated": 337,
"auto": 56,
"rushed": 732,
"gold": 355,
"fair": 300,
"spoken": 803,
"card": 106,
"additional": 18,
"walking": 927,
"ownership": 570,
"game": 341,
"refund": 679,
"start": 812,
"file": 313,
"value": 918,
"confused": 169,
"experiences": 289,
"technical": 847,
"double": 233,
"living": 485,
"created": 193,
"passed": 574,
"smooth": 788
}
}
}