NOT-OMEGA commited on
Commit
66be360
Β·
verified Β·
1 Parent(s): 29bb2d7

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +13 -84
main.py CHANGED
@@ -23,29 +23,23 @@ class GenerateRequest(BaseModel):
23
  temperature: float = 0.8
24
  top_k: int = 40
25
 
26
- # Tokenizer setup
27
  try:
28
  enc = tiktoken.get_encoding("gpt2")
29
- print("βœ… Tokenizer loaded successfully.")
30
  except Exception as e:
31
- print(f"❌ Warning: tiktoken not found. Error: {e}")
32
  enc = None
33
 
34
-
35
- # βœ… FIX 1: index.html serve karo root pe
36
  @app.get("/")
37
  async def root():
38
  current_dir = os.path.dirname(os.path.abspath(__file__))
39
  return FileResponse(os.path.join(current_dir, "index.html"))
40
 
41
-
42
  @app.get("/health")
43
  async def health_check():
44
  current_dir = os.path.dirname(os.path.abspath(__file__))
45
- # βœ… FIX 2: .exe β†’ no extension (Linux binary)
46
- exe_path = os.path.join(current_dir, "inference")
47
- model_path = os.path.join(current_dir, "model.bin")
48
-
49
  return {
50
  "status": "ok",
51
  "inference_exe_found": os.path.exists(exe_path),
@@ -53,101 +47,55 @@ async def health_check():
53
  "working_directory": current_dir
54
  }
55
 
56
-
57
  @app.post("/generate")
58
  async def generate_text(req: GenerateRequest):
59
-
60
- # 0. Tokenizer check
61
  if enc is None:
62
- raise HTTPException(
63
- status_code=500,
64
- detail="Tokenizer not loaded. Run: pip install tiktoken"
65
- )
66
 
67
- # 1. Encode prompt
68
  input_tokens = enc.encode(req.prompt)
69
  token_str = ",".join(map(str, input_tokens))
70
 
71
- # 2. Path setup
72
  current_dir = os.path.dirname(os.path.abspath(__file__))
73
- # βœ… FIX 3: .exe β†’ no extension (Linux binary)
74
  exe_path = os.path.join(current_dir, "inference")
75
  model_path = os.path.join(current_dir, "model.bin")
76
 
77
- print(f"DEBUG: exe -> {exe_path} exists={os.path.exists(exe_path)}")
78
- print(f"DEBUG: model -> {model_path} exists={os.path.exists(model_path)}")
79
-
80
- # 3. File existence checks
81
  if not os.path.exists(exe_path):
82
- raise HTTPException(
83
- status_code=500,
84
- detail=f"inference binary nahi mili: {exe_path} β€” Dockerfile se compile honi chahiye!"
85
- )
86
 
87
  if not os.path.exists(model_path):
88
- raise HTTPException(
89
- status_code=500,
90
- detail=f"model.bin nahi mili: {model_path} β€” Model file same folder mein rakhni hai!"
91
- )
92
 
93
- # 4. Run C++ engine
94
  try:
95
  start_time = time.perf_counter()
96
-
97
  process = subprocess.run(
98
- [
99
- exe_path,
100
- token_str,
101
- str(req.max_tokens),
102
- str(req.temperature),
103
- str(req.top_k),
104
- ],
105
  capture_output=True,
106
  text=True,
107
  cwd=current_dir
108
  )
109
-
110
  elapsed_ms = (time.perf_counter() - start_time) * 1000
111
-
112
  except Exception as e:
113
  raise HTTPException(status_code=500, detail=f"Execution failed: {str(e)}")
114
 
115
- # 5. Error check
116
  if process.returncode != 0 and not process.stdout.strip():
117
  stdout_msg = process.stdout.strip() if process.stdout else ""
118
  stderr_msg = process.stderr.strip() if process.stderr else ""
 
119
 
120
- if "ERROR_MODEL_NOT_FOUND" in stdout_msg:
121
- raise HTTPException(status_code=500, detail="model.bin nahi mili! Same folder mein rakho.")
122
- elif "ERROR_ARGS" in stdout_msg:
123
- raise HTTPException(status_code=500, detail="C++ engine ko arguments galat mile.")
124
- else:
125
- raise HTTPException(
126
- status_code=500,
127
- detail=f"C++ Error | stdout: '{stdout_msg}' | stderr: '{stderr_msg}'"
128
- )
129
-
130
- # 6. Decode output token IDs
131
  try:
132
  output_str = process.stdout.strip()
133
-
134
- if not output_str:
135
- generated_ids = []
136
- else:
137
- generated_ids = []
138
  for x in output_str.split():
139
  try:
140
  generated_ids.append(int(x))
141
  except ValueError:
142
- print(f"DEBUG: skipping non-integer token: '{x}'")
143
 
144
  generated_text = enc.decode(generated_ids) if generated_ids else ""
145
-
146
  tokens_out = len(generated_ids)
147
  tokens_per_sec = round(tokens_out / (elapsed_ms / 1000), 2) if elapsed_ms > 0 else 0
148
 
149
- print(f"βœ… Generated {tokens_out} tokens in {elapsed_ms:.2f}ms ({tokens_per_sec} tok/s)")
150
-
151
  return {
152
  "prompt": req.prompt,
153
  "generated_text": generated_text,
@@ -156,24 +104,5 @@ async def generate_text(req: GenerateRequest):
156
  "latency_ms": round(elapsed_ms, 2),
157
  "tokens_per_sec": tokens_per_sec
158
  }
159
-
160
  except Exception as e:
161
- raise HTTPException(status_code=500, detail=f"Decoding error: {str(e)}")
162
- ```
163
-
164
- ---
165
-
166
- ## πŸš€ Deployment Steps (Basic se Basic)
167
-
168
- **Step 1** β€” Hugging Face pe jaao β†’ apna `NOT-OMEGA/Inference` Space kholo
169
-
170
- **Step 2** β€” `main.py` file pe click karo β†’ Edit button dabao β†’ Pura purana code delete karo β†’ Upar wala naya code paste karo β†’ **Commit changes**
171
-
172
- **Step 3** β€” Baaki files (Dockerfile, index.html, inference.cpp, requirements.txt) already sahi hain, unhe **mat chhuona**
173
-
174
- **Step 4** β€” Space automatically rebuild hoga (2-5 minutes lagenge)
175
-
176
- **Step 5** β€” **Logs** tab check karo β€” ye lines dikhni chahiye:
177
- ```
178
- βœ… Tokenizer loaded successfully.
179
- INFO: Uvicorn running on http://0.0.0.0:7860
 
23
  temperature: float = 0.8
24
  top_k: int = 40
25
 
 
26
  try:
27
  enc = tiktoken.get_encoding("gpt2")
28
+ print("Tokenizer loaded successfully.")
29
  except Exception as e:
30
+ print(f"Warning: tiktoken not found. Error: {e}")
31
  enc = None
32
 
 
 
33
  @app.get("/")
34
  async def root():
35
  current_dir = os.path.dirname(os.path.abspath(__file__))
36
  return FileResponse(os.path.join(current_dir, "index.html"))
37
 
 
38
  @app.get("/health")
39
  async def health_check():
40
  current_dir = os.path.dirname(os.path.abspath(__file__))
41
+ exe_path = os.path.join(current_dir, "inference")
42
+ model_path = os.path.join(current_dir, "model.bin")
 
 
43
  return {
44
  "status": "ok",
45
  "inference_exe_found": os.path.exists(exe_path),
 
47
  "working_directory": current_dir
48
  }
49
 
 
50
  @app.post("/generate")
51
  async def generate_text(req: GenerateRequest):
 
 
52
  if enc is None:
53
+ raise HTTPException(status_code=500, detail="Tokenizer not loaded.")
 
 
 
54
 
 
55
  input_tokens = enc.encode(req.prompt)
56
  token_str = ",".join(map(str, input_tokens))
57
 
 
58
  current_dir = os.path.dirname(os.path.abspath(__file__))
 
59
  exe_path = os.path.join(current_dir, "inference")
60
  model_path = os.path.join(current_dir, "model.bin")
61
 
 
 
 
 
62
  if not os.path.exists(exe_path):
63
+ raise HTTPException(status_code=500, detail=f"inference binary not found: {exe_path}")
 
 
 
64
 
65
  if not os.path.exists(model_path):
66
+ raise HTTPException(status_code=500, detail=f"model.bin not found: {model_path}")
 
 
 
67
 
 
68
  try:
69
  start_time = time.perf_counter()
 
70
  process = subprocess.run(
71
+ [exe_path, token_str, str(req.max_tokens), str(req.temperature), str(req.top_k)],
 
 
 
 
 
 
72
  capture_output=True,
73
  text=True,
74
  cwd=current_dir
75
  )
 
76
  elapsed_ms = (time.perf_counter() - start_time) * 1000
 
77
  except Exception as e:
78
  raise HTTPException(status_code=500, detail=f"Execution failed: {str(e)}")
79
 
 
80
  if process.returncode != 0 and not process.stdout.strip():
81
  stdout_msg = process.stdout.strip() if process.stdout else ""
82
  stderr_msg = process.stderr.strip() if process.stderr else ""
83
+ raise HTTPException(status_code=500, detail=f"C++ Error | stdout: '{stdout_msg}' | stderr: '{stderr_msg}'")
84
 
 
 
 
 
 
 
 
 
 
 
 
85
  try:
86
  output_str = process.stdout.strip()
87
+ generated_ids = []
88
+ if output_str:
 
 
 
89
  for x in output_str.split():
90
  try:
91
  generated_ids.append(int(x))
92
  except ValueError:
93
+ pass
94
 
95
  generated_text = enc.decode(generated_ids) if generated_ids else ""
 
96
  tokens_out = len(generated_ids)
97
  tokens_per_sec = round(tokens_out / (elapsed_ms / 1000), 2) if elapsed_ms > 0 else 0
98
 
 
 
99
  return {
100
  "prompt": req.prompt,
101
  "generated_text": generated_text,
 
104
  "latency_ms": round(elapsed_ms, 2),
105
  "tokens_per_sec": tokens_per_sec
106
  }
 
107
  except Exception as e:
108
+ raise HTTPException(status_code=500, detail=f"Decoding error: {str(e)}")