eesfeg commited on
Commit
75e6b29
·
1 Parent(s): 4325282
app.py CHANGED
@@ -24,7 +24,7 @@ warnings.filterwarnings("ignore")
24
  os.environ["PYTHONWARNINGS"] = "ignore"
25
 
26
  # =================== MODEL LOADING ===================
27
-
28
  def load_model():
29
  """Load the TinyLlama model"""
30
  print("🚀 Loading Mistral_Test model...")
@@ -34,8 +34,9 @@ def load_model():
34
  model = AutoModelForCausalLM.from_pretrained(
35
  MODEL_ID,
36
  torch_dtype=torch.float32,
37
- device_map="auto",
38
- low_cpu_mem_usage=True
 
39
  )
40
 
41
  print("✅ Model loaded successfully!")
 
24
  os.environ["PYTHONWARNINGS"] = "ignore"
25
 
26
  # =================== MODEL LOADING ===================
27
+ @gr.cache_resource
28
  def load_model():
29
  """Load the TinyLlama model"""
30
  print("🚀 Loading Mistral_Test model...")
 
34
  model = AutoModelForCausalLM.from_pretrained(
35
  MODEL_ID,
36
  torch_dtype=torch.float32,
37
+ device_map="cpu",
38
+ low_cpu_mem_usage=True,
39
+ offload_folder="offload"
40
  )
41
 
42
  print("✅ Model loaded successfully!")
imdb_dataset/dataset_dict.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"splits": ["train", "test", "unsupervised"]}
imdb_dataset/test/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c7203aaae5b948ac341d611e27d8ca54b7812d34141bf2edba8d3753c89e7dd
3
+ size 32656552
imdb_dataset/test/dataset_info.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "builder_name": "parquet",
3
+ "citation": "",
4
+ "config_name": "plain_text",
5
+ "dataset_name": "imdb",
6
+ "dataset_size": 133202802,
7
+ "description": "",
8
+ "download_checksums": {
9
+ "hf://datasets/imdb@e6281661ce1c48d982bc483cf8a173c1bbeb5d31/plain_text/train-00000-of-00001.parquet": {
10
+ "num_bytes": 20979968,
11
+ "checksum": null
12
+ },
13
+ "hf://datasets/imdb@e6281661ce1c48d982bc483cf8a173c1bbeb5d31/plain_text/test-00000-of-00001.parquet": {
14
+ "num_bytes": 20470363,
15
+ "checksum": null
16
+ },
17
+ "hf://datasets/imdb@e6281661ce1c48d982bc483cf8a173c1bbeb5d31/plain_text/unsupervised-00000-of-00001.parquet": {
18
+ "num_bytes": 41996509,
19
+ "checksum": null
20
+ }
21
+ },
22
+ "download_size": 83446840,
23
+ "features": {
24
+ "text": {
25
+ "dtype": "string",
26
+ "_type": "Value"
27
+ },
28
+ "label": {
29
+ "names": [
30
+ "neg",
31
+ "pos"
32
+ ],
33
+ "_type": "ClassLabel"
34
+ }
35
+ },
36
+ "homepage": "",
37
+ "license": "",
38
+ "size_in_bytes": 216649642,
39
+ "splits": {
40
+ "train": {
41
+ "name": "train",
42
+ "num_bytes": 33435948,
43
+ "num_examples": 25000,
44
+ "dataset_name": "imdb"
45
+ },
46
+ "test": {
47
+ "name": "test",
48
+ "num_bytes": 32653810,
49
+ "num_examples": 25000,
50
+ "dataset_name": "imdb"
51
+ },
52
+ "unsupervised": {
53
+ "name": "unsupervised",
54
+ "num_bytes": 67113044,
55
+ "num_examples": 50000,
56
+ "dataset_name": "imdb"
57
+ }
58
+ },
59
+ "version": {
60
+ "version_str": "0.0.0",
61
+ "major": 0,
62
+ "minor": 0,
63
+ "patch": 0
64
+ }
65
+ }
imdb_dataset/test/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "0c4517be449a88ae",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": "test"
13
+ }
imdb_dataset/train/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f135c5a8f758597f5f96f36d9e7676a612f0aac53ae8411bfb799253eefbc7c
3
+ size 33438688
imdb_dataset/train/dataset_info.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "builder_name": "parquet",
3
+ "citation": "",
4
+ "config_name": "plain_text",
5
+ "dataset_name": "imdb",
6
+ "dataset_size": 133202802,
7
+ "description": "",
8
+ "download_checksums": {
9
+ "hf://datasets/imdb@e6281661ce1c48d982bc483cf8a173c1bbeb5d31/plain_text/train-00000-of-00001.parquet": {
10
+ "num_bytes": 20979968,
11
+ "checksum": null
12
+ },
13
+ "hf://datasets/imdb@e6281661ce1c48d982bc483cf8a173c1bbeb5d31/plain_text/test-00000-of-00001.parquet": {
14
+ "num_bytes": 20470363,
15
+ "checksum": null
16
+ },
17
+ "hf://datasets/imdb@e6281661ce1c48d982bc483cf8a173c1bbeb5d31/plain_text/unsupervised-00000-of-00001.parquet": {
18
+ "num_bytes": 41996509,
19
+ "checksum": null
20
+ }
21
+ },
22
+ "download_size": 83446840,
23
+ "features": {
24
+ "text": {
25
+ "dtype": "string",
26
+ "_type": "Value"
27
+ },
28
+ "label": {
29
+ "names": [
30
+ "neg",
31
+ "pos"
32
+ ],
33
+ "_type": "ClassLabel"
34
+ }
35
+ },
36
+ "homepage": "",
37
+ "license": "",
38
+ "size_in_bytes": 216649642,
39
+ "splits": {
40
+ "train": {
41
+ "name": "train",
42
+ "num_bytes": 33435948,
43
+ "num_examples": 25000,
44
+ "dataset_name": "imdb"
45
+ },
46
+ "test": {
47
+ "name": "test",
48
+ "num_bytes": 32653810,
49
+ "num_examples": 25000,
50
+ "dataset_name": "imdb"
51
+ },
52
+ "unsupervised": {
53
+ "name": "unsupervised",
54
+ "num_bytes": 67113044,
55
+ "num_examples": 50000,
56
+ "dataset_name": "imdb"
57
+ }
58
+ },
59
+ "version": {
60
+ "version_str": "0.0.0",
61
+ "major": 0,
62
+ "minor": 0,
63
+ "patch": 0
64
+ }
65
+ }
imdb_dataset/train/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "2819011aee707696",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": "train"
13
+ }
imdb_dataset/unsupervised/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3700e4aea3020b97dabf57c511456492423935cf7df0b66eefd986f343c73a9b
3
+ size 67118128
imdb_dataset/unsupervised/dataset_info.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "builder_name": "parquet",
3
+ "citation": "",
4
+ "config_name": "plain_text",
5
+ "dataset_name": "imdb",
6
+ "dataset_size": 133202802,
7
+ "description": "",
8
+ "download_checksums": {
9
+ "hf://datasets/imdb@e6281661ce1c48d982bc483cf8a173c1bbeb5d31/plain_text/train-00000-of-00001.parquet": {
10
+ "num_bytes": 20979968,
11
+ "checksum": null
12
+ },
13
+ "hf://datasets/imdb@e6281661ce1c48d982bc483cf8a173c1bbeb5d31/plain_text/test-00000-of-00001.parquet": {
14
+ "num_bytes": 20470363,
15
+ "checksum": null
16
+ },
17
+ "hf://datasets/imdb@e6281661ce1c48d982bc483cf8a173c1bbeb5d31/plain_text/unsupervised-00000-of-00001.parquet": {
18
+ "num_bytes": 41996509,
19
+ "checksum": null
20
+ }
21
+ },
22
+ "download_size": 83446840,
23
+ "features": {
24
+ "text": {
25
+ "dtype": "string",
26
+ "_type": "Value"
27
+ },
28
+ "label": {
29
+ "names": [
30
+ "neg",
31
+ "pos"
32
+ ],
33
+ "_type": "ClassLabel"
34
+ }
35
+ },
36
+ "homepage": "",
37
+ "license": "",
38
+ "size_in_bytes": 216649642,
39
+ "splits": {
40
+ "train": {
41
+ "name": "train",
42
+ "num_bytes": 33435948,
43
+ "num_examples": 25000,
44
+ "dataset_name": "imdb"
45
+ },
46
+ "test": {
47
+ "name": "test",
48
+ "num_bytes": 32653810,
49
+ "num_examples": 25000,
50
+ "dataset_name": "imdb"
51
+ },
52
+ "unsupervised": {
53
+ "name": "unsupervised",
54
+ "num_bytes": 67113044,
55
+ "num_examples": 50000,
56
+ "dataset_name": "imdb"
57
+ }
58
+ },
59
+ "version": {
60
+ "version_str": "0.0.0",
61
+ "major": 0,
62
+ "minor": 0,
63
+ "patch": 0
64
+ }
65
+ }
imdb_dataset/unsupervised/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "df8b13b1c356ff29",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": "unsupervised"
13
+ }
load.py CHANGED
@@ -1,6 +1,11 @@
1
  from datasets import load_dataset
2
 
3
- # Load a dataset
4
- dataset = load_dataset("imdb") # Example: IMDB reviews
5
- # Or your custom dataset
6
- # dataset = load_dataset("json", data_files="your_data.json")
 
 
 
 
 
 
1
  from datasets import load_dataset
2
 
3
+ # Load dataset
4
+ dataset = load_dataset("imdb")
5
+
6
+ # Save to disk
7
+ dataset.save_to_disk("./imdb_dataset")
8
+
9
+ # Later: reload
10
+ # from datasets import load_from_disk
11
+ # dataset = load_from_disk("./imdb_dataset")