rohitkshirsagar19 commited on
Commit
3bf2541
Β·
verified Β·
1 Parent(s): d317597

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +151 -35
main.py CHANGED
@@ -1,33 +1,35 @@
1
  from __future__ import annotations
2
 
 
3
  import logging
 
4
  from typing import Optional
5
 
6
  from fastapi import FastAPI, HTTPException
7
  from fastapi.middleware.cors import CORSMiddleware
8
  from pydantic import BaseModel, Field
 
9
 
10
  logger = logging.getLogger("promptzip")
11
 
12
  # ── App ───────────────────────────────────────────────────────────────────────
13
  app = FastAPI(
14
  title="PromptZip API",
15
- description="Semantic text compression via LLMlingua. Code and log compression run client-side.",
16
- version="0.2.0",
17
  )
18
 
19
- # Permissive CORS β€” required for browser clients calling the HF Space
 
20
  app.add_middleware(
21
  CORSMiddleware,
22
- allow_origins=["*"],
23
- allow_credentials=False, # must be False when allow_origins=["*"]
24
  allow_methods=["*"],
25
  allow_headers=["*"],
26
  )
27
 
28
  # ── Tokenizer (loaded once at startup) ───────────────────────────────────────
29
- import tiktoken
30
-
31
  _encoder = tiktoken.get_encoding("cl100k_base")
32
  _COST_PER_MILLION: float = 5.00 # USD β€” GPT-4o standard input rate
33
 
@@ -38,27 +40,25 @@ def count_tokens(text: str) -> int:
38
 
39
 
40
  def estimate_cost(token_count: int) -> float:
41
- """USD cost at $5.00 / 1 M tokens."""
42
  return round((token_count / 1_000_000) * _COST_PER_MILLION, 6)
43
 
44
 
45
- # ── LLMlingua (lazy-loaded so startup is never blocked) ──────────────────────
46
  _llmlingua_compressor = None
47
  _llmlingua_error: Optional[str] = None
48
 
49
- # Aggression β†’ target retention ratio
50
- _TEXT_RATIOS = {1: 0.8, 2: 0.6, 3: 0.4}
51
-
52
 
53
  def _get_llmlingua():
54
- """Return a cached PromptCompressor, or raise HTTP 503 if unavailable."""
55
  global _llmlingua_compressor, _llmlingua_error
56
  if _llmlingua_compressor is not None:
57
  return _llmlingua_compressor
58
  if _llmlingua_error is not None:
59
  raise HTTPException(
60
  status_code=503,
61
- detail=f"LLMlingua unavailable: {_llmlingua_error}",
 
62
  )
63
  try:
64
  from llmlingua import PromptCompressor
@@ -74,34 +74,146 @@ def _get_llmlingua():
74
  logger.error("LLMlingua init failed: %s", exc)
75
  raise HTTPException(
76
  status_code=503,
77
- detail=f"LLMlingua unavailable: {exc}",
 
78
  )
79
 
80
 
81
- # ── Compression ───────────────────────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
  def compress_text(text: str, aggression: int) -> str:
84
- """Semantic compression via LLMlingua PromptCompressor."""
 
 
 
85
  compressor = _get_llmlingua()
86
  ratio = _TEXT_RATIOS[aggression]
87
  result = compressor.compress_prompt(
88
  text,
89
  rate=ratio,
90
- force_tokens=["\n"],
91
- #drop_consecutive_whitespace=True, # not supported to current version
92
  )
93
  return result.get("compressed_prompt", text)
94
 
95
 
96
  # ── Schemas ───────────────────────────────────────────────────────────────────
 
 
 
 
 
97
 
98
  class CompressRequest(BaseModel):
99
- text: str = Field(..., description="The raw text to compress semantically.")
 
100
  aggression_level: int = Field(
101
  2,
102
  ge=1,
103
  le=3,
104
- description="1 = gentle (80% retained), 2 = balanced (60%), 3 = aggressive (40%).",
105
  )
106
 
107
 
@@ -110,22 +222,22 @@ class CompressResponse(BaseModel):
110
  original_tokens: int
111
  new_tokens: int
112
  tokens_saved: int
113
- percent_saved: float
114
- dollars_saved: float
 
115
  aggression_level: int
116
 
117
 
118
  # ── Endpoints ─────────────────────────────────────────────────────────────────
119
-
120
  @app.get("/health", tags=["Health"])
121
  async def health_check():
122
- """Liveness probe β€” confirms the API is running."""
123
- return {"status": "ok", "service": "promptzip-api", "version": "0.2.0"}
124
 
125
 
126
  @app.post("/api/tokenize", tags=["Tokenizer"])
127
  async def tokenize(body: dict):
128
- """Count exact tokens for a text payload and return estimated cost."""
129
  text = body.get("text", "")
130
  tokens = count_tokens(text)
131
  return {
@@ -139,15 +251,20 @@ async def tokenize(body: dict):
139
  @app.post("/api/compress", response_model=CompressResponse, tags=["Compress"])
140
  async def compress(body: CompressRequest):
141
  """
142
- Semantically compress **text** using LLMlingua.
143
-
144
- - Code and log compression are handled client-side (regex) in the frontend.
145
- - Only `mode=text` is served here.
146
  """
147
  if not body.text.strip():
148
  raise HTTPException(status_code=400, detail="text must not be empty.")
149
 
150
- compressed = compress_text(body.text, body.aggression_level)
 
 
 
 
 
151
 
152
  original_tokens = count_tokens(body.text)
153
  new_tokens = count_tokens(compressed)
@@ -160,8 +277,7 @@ async def compress(body: CompressRequest):
160
  new_tokens=new_tokens,
161
  tokens_saved=saved,
162
  percent_saved=pct,
163
- dollars_saved=round(
164
- estimate_cost(original_tokens) - estimate_cost(new_tokens), 6
165
- ),
166
  aggression_level=body.aggression_level,
167
  )
 
1
  from __future__ import annotations
2
 
3
+ import re
4
  import logging
5
+ from enum import Enum
6
  from typing import Optional
7
 
8
  from fastapi import FastAPI, HTTPException
9
  from fastapi.middleware.cors import CORSMiddleware
10
  from pydantic import BaseModel, Field
11
+ import tiktoken
12
 
13
  logger = logging.getLogger("promptzip")
14
 
15
  # ── App ───────────────────────────────────────────────────────────────────────
16
  app = FastAPI(
17
  title="PromptZip API",
18
+ description="Compress large text, code, and logs to save LLM context window space.",
19
+ version="0.1.0",
20
  )
21
 
22
+ origins = ["*"]
23
+
24
  app.add_middleware(
25
  CORSMiddleware,
26
+ allow_origins=origins,
27
+ allow_credentials=False,
28
  allow_methods=["*"],
29
  allow_headers=["*"],
30
  )
31
 
32
  # ── Tokenizer (loaded once at startup) ───────────────────────────────────────
 
 
33
  _encoder = tiktoken.get_encoding("cl100k_base")
34
  _COST_PER_MILLION: float = 5.00 # USD β€” GPT-4o standard input rate
35
 
 
40
 
41
 
42
  def estimate_cost(token_count: int) -> float:
43
+ """USD cost rounded to 6 dp at $5.00 / 1 M tokens."""
44
  return round((token_count / 1_000_000) * _COST_PER_MILLION, 6)
45
 
46
 
47
+ # ── LLMlingua (optional β€” lazy-loaded so startup is never blocked) ────────────
48
  _llmlingua_compressor = None
49
  _llmlingua_error: Optional[str] = None
50
 
 
 
 
51
 
52
  def _get_llmlingua():
53
+ """Return a cached PromptCompressor, or raise HTTPException if unavailable."""
54
  global _llmlingua_compressor, _llmlingua_error
55
  if _llmlingua_compressor is not None:
56
  return _llmlingua_compressor
57
  if _llmlingua_error is not None:
58
  raise HTTPException(
59
  status_code=503,
60
+ detail=f"LLMlingua failed to load: {_llmlingua_error}. "
61
+ "Use mode='code' or mode='logs' for regex-based compression.",
62
  )
63
  try:
64
  from llmlingua import PromptCompressor
 
74
  logger.error("LLMlingua init failed: %s", exc)
75
  raise HTTPException(
76
  status_code=503,
77
+ detail=f"LLMlingua failed to load: {exc}. "
78
+ "Use mode='code' or mode='logs' for regex-based compression.",
79
  )
80
 
81
 
82
+ # ── Compression logic ─────────────────────────────────────────────────────────
83
+
84
+ # Aggression β†’ target retention ratio for LLMlingua
85
+ _TEXT_RATIOS = {1: 0.8, 2: 0.6, 3: 0.4}
86
+
87
+
88
+ def compress_logs(text: str, aggression: int) -> str:
89
+ """
90
+ Regex-based log compression:
91
+ 1. Strip common timestamp patterns.
92
+ 2. Optionally strip IPv4 addresses (aggression >= 2).
93
+ 3. Collapse consecutive duplicate lines (repeating errors).
94
+ 4. Collapse runs of blank lines to a single blank.
95
+ """
96
+ # --- Timestamps ---
97
+ # ISO-8601 / syslog / common log format variants
98
+ timestamp_patterns = [
99
+ # [2023-10-12 14:00:00.123] or 2023-10-12T14:00:00Z
100
+ r"\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}(?:[.,]\d+)?(?:Z|[+-]\d{2}:\d{2})?\s*",
101
+ # [12/Oct/2023:14:00:00 +0000]
102
+ r"\[\d{2}/\w+/\d{4}:\d{2}:\d{2}:\d{2} [+-]\d{4}\]\s*",
103
+ # Jan 12 14:00:00
104
+ r"\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+\d{1,2}\s+\d{2}:\d{2}:\d{2}\s*",
105
+ # [14:00:00]
106
+ r"\[\d{2}:\d{2}:\d{2}(?:\.\d+)?\]\s*",
107
+ ]
108
+ for pat in timestamp_patterns:
109
+ text = re.sub(pat, "", text)
110
+
111
+ # --- IP addresses (aggression >= 2) ---
112
+ if aggression >= 2:
113
+ text = re.sub(
114
+ r"\b(?:\d{1,3}\.){3}\d{1,3}(?::\d+)?\b",
115
+ "<ip>",
116
+ text,
117
+ )
118
+
119
+ # --- Collapse consecutive duplicate lines ---
120
+ lines = text.splitlines()
121
+ deduped: list[str] = []
122
+ prev = None
123
+ repeat_count = 0
124
+ for line in lines:
125
+ stripped = line.strip()
126
+ if stripped == prev and stripped: # skip blank dedup here
127
+ repeat_count += 1
128
+ if repeat_count == 1:
129
+ deduped.append(f" [repeated {repeat_count + 1}x ↑]")
130
+ else:
131
+ deduped[-1] = f" [repeated {repeat_count + 1}x ↑]"
132
+ else:
133
+ repeat_count = 0
134
+ deduped.append(line)
135
+ prev = stripped
136
+
137
+ # --- Collapse blank lines ---
138
+ text = "\n".join(deduped)
139
+ text = re.sub(r"\n{3,}", "\n\n", text)
140
+
141
+ # --- Strip leading/trailing whitespace per line (aggression 3) ---
142
+ if aggression == 3:
143
+ text = "\n".join(l.strip() for l in text.splitlines())
144
+ text = re.sub(r"\n{2,}", "\n", text)
145
+
146
+ return text.strip()
147
+
148
+
149
+ def compress_code(text: str, aggression: int) -> str:
150
+ """
151
+ Regex-based code comment & whitespace stripping:
152
+ - Remove /* ... */ block comments (including docblock variants /** */)
153
+ - Remove Python/Ruby # single-line comments
154
+ - Remove C++/JS // single-line comments
155
+ - Remove Python/Java triple-quoted docstrings (aggression >= 2)
156
+ - Remove blank / whitespace-only lines (aggression >= 2)
157
+ - Strip trailing whitespace and over-indent (aggression 3)
158
+ """
159
+ # --- Block comments: /* ... */ (non-greedy, dotall) ---
160
+ text = re.sub(r"/\*.*?\*/", "", text, flags=re.DOTALL)
161
+
162
+ # --- Triple-quoted Python docstrings (aggression >= 2) ---
163
+ if aggression >= 2:
164
+ text = re.sub(r'""".*?"""', "", text, flags=re.DOTALL)
165
+ text = re.sub(r"'''.*?'''", "", text, flags=re.DOTALL)
166
+
167
+ # --- Single-line comments ---
168
+ # // comments (not inside strings β€” best-effort with regex)
169
+ text = re.sub(r"(?m)(?<!:)//.*$", "", text)
170
+ # # comments β€” skip shebang on line 1
171
+ text = re.sub(r"(?m)(?<!^#!)#.*$", "", text)
172
+
173
+ # --- Trailing whitespace ---
174
+ text = re.sub(r"(?m)[ \t]+$", "", text)
175
+
176
+ # --- Blank lines (aggression >= 2) ---
177
+ if aggression >= 2:
178
+ text = re.sub(r"\n{2,}", "\n", text)
179
+
180
+ # --- Aggressive: remove all indentation & collapse to single lines per block ---
181
+ if aggression == 3:
182
+ text = re.sub(r"(?m)^[ \t]+", "", text)
183
+
184
+ return text.strip()
185
+
186
 
187
  def compress_text(text: str, aggression: int) -> str:
188
+ """
189
+ Semantic compression via LLMlingua PromptCompressor.
190
+ Falls back gracefully if the model cannot be loaded.
191
+ """
192
  compressor = _get_llmlingua()
193
  ratio = _TEXT_RATIOS[aggression]
194
  result = compressor.compress_prompt(
195
  text,
196
  rate=ratio,
197
+ force_tokens=["\n"], # preserve newline structure
 
198
  )
199
  return result.get("compressed_prompt", text)
200
 
201
 
202
  # ── Schemas ───────────────────────────────────────────────────────────────────
203
+ class Mode(str, Enum):
204
+ text = "text"
205
+ code = "code"
206
+ logs = "logs"
207
+
208
 
209
  class CompressRequest(BaseModel):
210
+ text: str = Field(..., description="The raw text, code, or log to compress.")
211
+ mode: Mode = Field(Mode.text, description="Compression strategy: text | code | logs.")
212
  aggression_level: int = Field(
213
  2,
214
  ge=1,
215
  le=3,
216
+ description="1 = gentle, 2 = balanced, 3 = aggressive.",
217
  )
218
 
219
 
 
222
  original_tokens: int
223
  new_tokens: int
224
  tokens_saved: int
225
+ percent_saved: float = Field(..., description="Percentage of tokens removed.")
226
+ dollars_saved: float = Field(..., description="Estimated API cost delta in USD.")
227
+ mode: Mode
228
  aggression_level: int
229
 
230
 
231
  # ── Endpoints ─────────────────────────────────────────────────────────────────
 
232
  @app.get("/health", tags=["Health"])
233
  async def health_check():
234
+ """Check that the API is alive and responding."""
235
+ return {"status": "ok", "service": "promptzip-api"}
236
 
237
 
238
  @app.post("/api/tokenize", tags=["Tokenizer"])
239
  async def tokenize(body: dict):
240
+ """Count exact tokens and return estimated cost."""
241
  text = body.get("text", "")
242
  tokens = count_tokens(text)
243
  return {
 
251
  @app.post("/api/compress", response_model=CompressResponse, tags=["Compress"])
252
  async def compress(body: CompressRequest):
253
  """
254
+ Compress *text* using the chosen strategy:
255
+ - **logs** β€” regex strips timestamps, IPs, and repeating lines
256
+ - **code** β€” regex strips comments, docstrings, blank lines
257
+ - **text** β€” semantic compression via LLMlingua PromptCompressor
258
  """
259
  if not body.text.strip():
260
  raise HTTPException(status_code=400, detail="text must not be empty.")
261
 
262
+ dispatch = {
263
+ Mode.logs: compress_logs,
264
+ Mode.code: compress_code,
265
+ Mode.text: compress_text,
266
+ }
267
+ compressed = dispatch[body.mode](body.text, body.aggression_level)
268
 
269
  original_tokens = count_tokens(body.text)
270
  new_tokens = count_tokens(compressed)
 
277
  new_tokens=new_tokens,
278
  tokens_saved=saved,
279
  percent_saved=pct,
280
+ dollars_saved=round(estimate_cost(original_tokens) - estimate_cost(new_tokens), 6),
281
+ mode=body.mode,
 
282
  aggression_level=body.aggression_level,
283
  )