TobDeBer commited on
Commit
fd54d78
·
1 Parent(s): 56bfe89

add cpuinfo

Browse files
Files changed (2) hide show
  1. app.py +33 -7
  2. requirements.txt +21 -1
app.py CHANGED
@@ -5,6 +5,8 @@ import time
5
  from threading import Thread
6
  import sys
7
  import os
 
 
8
 
9
  # Model configuration
10
  if len(sys.argv) > 1 and os.path.exists(sys.argv[1]):
@@ -17,24 +19,48 @@ else:
17
  tokenizer = None
18
  model = None
19
 
 
 
 
 
20
  def load_model():
21
- """Load the Smol LLM model and tokenizer"""
22
  global tokenizer, model
23
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  print(f"Loading model: {MODEL_NAME}")
 
 
25
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
26
- tokenizer.padding_side = "left" # Ensure consistency
27
  if tokenizer.pad_token is None:
28
  tokenizer.pad_token = tokenizer.eos_token
 
29
  model = AutoModelForCausalLM.from_pretrained(
30
  MODEL_NAME,
 
31
  dtype=torch.float32,
32
- device_map={"": "cpu"}
 
33
  )
34
-
35
- if tokenizer.pad_token is None:
36
- tokenizer.pad_token = tokenizer.eos_token
37
-
38
  return "✅ Model loaded successfully!"
39
  except Exception as e:
40
  return f"❌ Error loading model: {str(e)}"
 
5
  from threading import Thread
6
  import sys
7
  import os
8
+ os.environ["BNB_CUDA_VERSION"] = "0" # Forces bitsandbytes to recognize no GPU
9
+ os.environ["OMP_NUM_THREADS"] = "1" # Prevents race conditions in custom CPU kernels
10
 
11
  # Model configuration
12
  if len(sys.argv) > 1 and os.path.exists(sys.argv[1]):
 
19
  tokenizer = None
20
  model = None
21
 
22
+ import platform
23
+ import subprocess
24
+ import cpuinfo # Optional: 'pip install py-cpuinfo' is better if you can add it
25
+
26
  def load_model():
27
+ """Load the Smol LLM model and tokenizer with hardware detection"""
28
  global tokenizer, model
29
  try:
30
+ print("--- Hardware Audit ---")
31
+ print(f"Processor: {platform.processor()}")
32
+ print(f"Machine: {platform.machine()}")
33
+
34
+ # Check for CPU Flags (Instruction Sets)
35
+ try:
36
+ # For Linux-based Cloud environments
37
+ cpu_flags = subprocess.check_output("lscpu", shell=True).decode()
38
+ print("Instruction sets found:")
39
+ for flag in ["avx512", "avx2", "avx", "fma", "amx"]:
40
+ if flag in cpu_flags.lower():
41
+ print(f" ✅ {flag.upper()} supported")
42
+ else:
43
+ print(f" ❌ {flag.upper()} NOT found")
44
+ except Exception as e:
45
+ print(f"Could not check CPU flags: {e}")
46
+
47
+ print(f"PyTorch version: {torch.__version__}")
48
  print(f"Loading model: {MODEL_NAME}")
49
+ print("----------------------")
50
+
51
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
52
+ tokenizer.padding_side = "left"
53
  if tokenizer.pad_token is None:
54
  tokenizer.pad_token = tokenizer.eos_token
55
+
56
  model = AutoModelForCausalLM.from_pretrained(
57
  MODEL_NAME,
58
+ # If the cloud lacks AVX512, float32 is safer but slower
59
  dtype=torch.float32,
60
+ device_map={"": "cpu"},
61
+ low_cpu_mem_usage=True
62
  )
63
+
 
 
 
64
  return "✅ Model loaded successfully!"
65
  except Exception as e:
66
  return f"❌ Error loading model: {str(e)}"
requirements.txt CHANGED
@@ -1,17 +1,28 @@
1
  accelerate==1.12.0
2
  aiofiles==23.2.1
 
 
 
3
  annotated-doc==0.0.4
4
  annotated-types==0.7.0
5
  anyio==4.12.0
 
 
 
6
  bitsandbytes==0.49.0
7
  brotli==1.2.0
8
  certifi==2025.11.12
 
9
  charset-normalizer==3.4.4
10
  click==8.3.1
 
 
 
11
  exceptiongroup==1.3.1
12
  fastapi==0.125.0
13
  ffmpy==1.0.0
14
  filelock==3.20.1
 
15
  fsspec==2025.10.0
16
  gradio==5.9.0
17
  gradio_client==1.5.2
@@ -27,6 +38,8 @@ markdown-it-py==4.0.0
27
  MarkupSafe==2.1.5
28
  mdurl==0.1.2
29
  mpmath==1.3.0
 
 
30
  networkx==3.4.2
31
  numpy==2.2.6
32
  nvidia-cublas-cu12==12.8.4.1
@@ -48,7 +61,11 @@ orjson==3.11.5
48
  packaging==25.0
49
  pandas==2.3.3
50
  pillow==11.3.0
 
51
  psutil==5.9.8
 
 
 
52
  pydantic==2.12.4
53
  pydantic_core==2.41.5
54
  pydub==0.25.1
@@ -66,13 +83,14 @@ safetensors==0.7.0
66
  semantic-version==2.10.0
67
  shellingham==1.5.4
68
  six==1.17.0
 
69
  starlette==0.50.0
70
  sympy==1.14.0
71
  tokenizers==0.22.1
72
  tomlkit==0.13.3
73
  torch==2.9.1
74
  tqdm==4.67.1
75
- transformers @ git+https://github.com/708-145/transformers.git@hirma
76
  triton==3.5.1
77
  typer==0.20.0
78
  typer-slim==0.20.0
@@ -82,3 +100,5 @@ tzdata==2025.3
82
  urllib3==2.6.2
83
  uvicorn==0.38.0
84
  websockets==14.2
 
 
 
1
  accelerate==1.12.0
2
  aiofiles==23.2.1
3
+ aiohappyeyeballs==2.6.1
4
+ aiohttp==3.13.2
5
+ aiosignal==1.4.0
6
  annotated-doc==0.0.4
7
  annotated-types==0.7.0
8
  anyio==4.12.0
9
+ async-timeout==5.0.1
10
+ attrs==25.4.0
11
+ Authlib==1.6.6
12
  bitsandbytes==0.49.0
13
  brotli==1.2.0
14
  certifi==2025.11.12
15
+ cffi==2.0.0
16
  charset-normalizer==3.4.4
17
  click==8.3.1
18
+ cryptography==46.0.3
19
+ datasets==4.4.2
20
+ dill==0.4.0
21
  exceptiongroup==1.3.1
22
  fastapi==0.125.0
23
  ffmpy==1.0.0
24
  filelock==3.20.1
25
+ frozenlist==1.8.0
26
  fsspec==2025.10.0
27
  gradio==5.9.0
28
  gradio_client==1.5.2
 
38
  MarkupSafe==2.1.5
39
  mdurl==0.1.2
40
  mpmath==1.3.0
41
+ multidict==6.7.0
42
+ multiprocess==0.70.18
43
  networkx==3.4.2
44
  numpy==2.2.6
45
  nvidia-cublas-cu12==12.8.4.1
 
61
  packaging==25.0
62
  pandas==2.3.3
63
  pillow==11.3.0
64
+ propcache==0.4.1
65
  psutil==5.9.8
66
+ py-cpuinfo==9.0.0
67
+ pyarrow==22.0.0
68
+ pycparser==2.23
69
  pydantic==2.12.4
70
  pydantic_core==2.41.5
71
  pydub==0.25.1
 
83
  semantic-version==2.10.0
84
  shellingham==1.5.4
85
  six==1.17.0
86
+ spaces==0.45.0
87
  starlette==0.50.0
88
  sympy==1.14.0
89
  tokenizers==0.22.1
90
  tomlkit==0.13.3
91
  torch==2.9.1
92
  tqdm==4.67.1
93
+ transformers @ git+https://github.com/708-145/transformers.git@4aacbc4f98e1c802cfeb7602a95c594321f1d360
94
  triton==3.5.1
95
  typer==0.20.0
96
  typer-slim==0.20.0
 
100
  urllib3==2.6.2
101
  uvicorn==0.38.0
102
  websockets==14.2
103
+ xxhash==3.6.0
104
+ yarl==1.22.0