Spaces:
Running on Zero
Running on Zero
| import json | |
| import pandas as pd | |
| def load_jsonl(path: str) -> list[dict]: | |
| """Load a JSONL file into a list of dicts.""" | |
| with open(path, encoding="utf-8") as f: | |
| return [json.loads(line) for line in f if line.strip()] | |
| ## LLMSQL 1.0 | |
| tables_llmsql_1 = pd.read_pickle("llmsql_1.0/llmsql_tables.pkl") | |
| questions_splits_llmsql_1 = pd.read_pickle("llmsql_1.0/llmsql_formatted.pkl") | |
| split_info_llmsql_1 = dict() | |
| questions_llmsql_1 = dict() | |
| for split_name in list(questions_splits_llmsql_1.keys()): | |
| sorted_keys = sorted( | |
| list(questions_splits_llmsql_1[split_name].keys()), key=lambda x: int(x) | |
| ) | |
| split_info_llmsql_1[split_name] = { | |
| "first": sorted_keys[0], | |
| "last": sorted_keys[-1], | |
| "count": len(sorted_keys), | |
| } | |
| for question_id, items in questions_splits_llmsql_1[split_name].items(): | |
| questions_llmsql_1[int(question_id)] = items | |
| tables_llmsql_1 = pd.read_pickle("llmsql_1.0/llmsql_tables.pkl") | |
| questions_splits_llmsql_1 = pd.read_pickle("llmsql_1.0/llmsql_formatted.pkl") | |
| split_info = dict() | |
| questions_llmsql_1 = dict() | |
| for split_name in list(questions_splits_llmsql_1.keys()): | |
| sorted_keys = sorted( | |
| list(questions_splits_llmsql_1[split_name].keys()), key=lambda x: int(x) | |
| ) | |
| split_info[split_name] = { | |
| "first": sorted_keys[0], | |
| "last": sorted_keys[-1], | |
| "count": len(sorted_keys), | |
| } | |
| for question_id, items in questions_splits_llmsql_1[split_name].items(): | |
| questions_llmsql_1[int(question_id)] = items | |
| ## LLMSQL 2.0 | |
| tables_list_llmsql_2 = load_jsonl("llmsql_2.0/tables.jsonl") | |
| questions_list_llmsql_2 = load_jsonl("llmsql_2.0/questions.jsonl") | |
| questions_llmsql_2 = dict() | |
| for question in questions_list_llmsql_2: | |
| questions_llmsql_2[question["question_id"]] = question | |
| tables_llmsql_2 = dict() | |
| for table in tables_list_llmsql_2: | |
| tables_llmsql_2[table["table_id"]] = table | |