Spaces:

rohitkshirsagar19
/

TokenZip-api

Sleeping

App Files Files Community

rohitkshirsagar19 commited on Feb 23

Commit

3bf2541

verified ·

1 Parent(s): d317597

Update main.py

Browse files

Files changed (1) hide show

main.py +151 -35

main.py CHANGED Viewed

@@ -1,33 +1,35 @@
 from __future__ import annotations
 import logging
 from typing import Optional
 from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel, Field
 logger = logging.getLogger("promptzip")
 # ── App ───────────────────────────────────────────────────────────────────────
 app = FastAPI(
     title="PromptZip API",
-    description="Semantic text compression via LLMlingua. Code and log compression run client-side.",
-    version="0.2.0",
 )
-# Permissive CORS — required for browser clients calling the HF Space
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=False,   # must be False when allow_origins=["*"]
     allow_methods=["*"],
     allow_headers=["*"],
 )
 # ── Tokenizer (loaded once at startup) ───────────────────────────────────────
-import tiktoken
 _encoder = tiktoken.get_encoding("cl100k_base")
 _COST_PER_MILLION: float = 5.00  # USD — GPT-4o standard input rate
@@ -38,27 +40,25 @@ def count_tokens(text: str) -> int:
 def estimate_cost(token_count: int) -> float:
-    """USD cost at $5.00 / 1 M tokens."""
     return round((token_count / 1_000_000) * _COST_PER_MILLION, 6)
-# ── LLMlingua (lazy-loaded so startup is never blocked) ──────────────────────
 _llmlingua_compressor = None
 _llmlingua_error: Optional[str] = None
-# Aggression → target retention ratio
-_TEXT_RATIOS = {1: 0.8, 2: 0.6, 3: 0.4}
 def _get_llmlingua():
-    """Return a cached PromptCompressor, or raise HTTP 503 if unavailable."""
     global _llmlingua_compressor, _llmlingua_error
     if _llmlingua_compressor is not None:
         return _llmlingua_compressor
     if _llmlingua_error is not None:
         raise HTTPException(
             status_code=503,
-            detail=f"LLMlingua unavailable: {_llmlingua_error}",
         )
     try:
         from llmlingua import PromptCompressor
@@ -74,34 +74,146 @@ def _get_llmlingua():
         logger.error("LLMlingua init failed: %s", exc)
         raise HTTPException(
             status_code=503,
-            detail=f"LLMlingua unavailable: {exc}",
         )
-# ── Compression ───────────────────────────────────────────────────────────────
 def compress_text(text: str, aggression: int) -> str:
-    """Semantic compression via LLMlingua PromptCompressor."""
     compressor = _get_llmlingua()
     ratio = _TEXT_RATIOS[aggression]
     result = compressor.compress_prompt(
         text,
         rate=ratio,
-        force_tokens=["\n"],
-        #drop_consecutive_whitespace=True, # not supported to current version
     )
     return result.get("compressed_prompt", text)
 # ── Schemas ───────────────────────────────────────────────────────────────────
 class CompressRequest(BaseModel):
-    text: str = Field(..., description="The raw text to compress semantically.")
     aggression_level: int = Field(
         2,
         ge=1,
         le=3,
-        description="1 = gentle (80% retained), 2 = balanced (60%), 3 = aggressive (40%).",
     )
@@ -110,22 +222,22 @@ class CompressResponse(BaseModel):
     original_tokens: int
     new_tokens: int
     tokens_saved: int
-    percent_saved: float
-    dollars_saved: float
     aggression_level: int
 # ── Endpoints ─────────────────────────────────────────────────────────────────
 @app.get("/health", tags=["Health"])
 async def health_check():
-    """Liveness probe — confirms the API is running."""
-    return {"status": "ok", "service": "promptzip-api", "version": "0.2.0"}
 @app.post("/api/tokenize", tags=["Tokenizer"])
 async def tokenize(body: dict):
-    """Count exact tokens for a text payload and return estimated cost."""
     text = body.get("text", "")
     tokens = count_tokens(text)
     return {
@@ -139,15 +251,20 @@ async def tokenize(body: dict):
 @app.post("/api/compress", response_model=CompressResponse, tags=["Compress"])
 async def compress(body: CompressRequest):
     """
-    Semantically compress **text** using LLMlingua.
-    - Code and log compression are handled client-side (regex) in the frontend.
-    - Only `mode=text` is served here.
     """
     if not body.text.strip():
         raise HTTPException(status_code=400, detail="text must not be empty.")
-    compressed = compress_text(body.text, body.aggression_level)
     original_tokens = count_tokens(body.text)
     new_tokens = count_tokens(compressed)
@@ -160,8 +277,7 @@ async def compress(body: CompressRequest):
         new_tokens=new_tokens,
         tokens_saved=saved,
         percent_saved=pct,
-        dollars_saved=round(
-            estimate_cost(original_tokens) - estimate_cost(new_tokens), 6
-        ),
         aggression_level=body.aggression_level,
     )

 from __future__ import annotations
+import re
 import logging
+from enum import Enum
 from typing import Optional
 from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel, Field
+import tiktoken
 logger = logging.getLogger("promptzip")
 # ── App ───────────────────────────────────────────────────────────────────────
 app = FastAPI(
     title="PromptZip API",
+    description="Compress large text, code, and logs to save LLM context window space.",
+    version="0.1.0",
 )
+origins = ["*"]
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=origins,
+    allow_credentials=False,
     allow_methods=["*"],
     allow_headers=["*"],
 )
 # ── Tokenizer (loaded once at startup) ───────────────────────────────────────
 _encoder = tiktoken.get_encoding("cl100k_base")
 _COST_PER_MILLION: float = 5.00  # USD — GPT-4o standard input rate
 def estimate_cost(token_count: int) -> float:
+    """USD cost rounded to 6 dp at $5.00 / 1 M tokens."""
     return round((token_count / 1_000_000) * _COST_PER_MILLION, 6)
+# ── LLMlingua (optional — lazy-loaded so startup is never blocked) ────────────
 _llmlingua_compressor = None
 _llmlingua_error: Optional[str] = None
 def _get_llmlingua():
+    """Return a cached PromptCompressor, or raise HTTPException if unavailable."""
     global _llmlingua_compressor, _llmlingua_error
     if _llmlingua_compressor is not None:
         return _llmlingua_compressor
     if _llmlingua_error is not None:
         raise HTTPException(
             status_code=503,
+            detail=f"LLMlingua failed to load: {_llmlingua_error}. "
+                   "Use mode='code' or mode='logs' for regex-based compression.",
         )
     try:
         from llmlingua import PromptCompressor
         logger.error("LLMlingua init failed: %s", exc)
         raise HTTPException(
             status_code=503,
+            detail=f"LLMlingua failed to load: {exc}. "
+                   "Use mode='code' or mode='logs' for regex-based compression.",
         )
+# ── Compression logic ─────────────────────────────────────────────────────────
+# Aggression → target retention ratio for LLMlingua
+_TEXT_RATIOS = {1: 0.8, 2: 0.6, 3: 0.4}
+def compress_logs(text: str, aggression: int) -> str:
+    """
+    Regex-based log compression:
+      1. Strip common timestamp patterns.
+      2. Optionally strip IPv4 addresses (aggression >= 2).
+      3. Collapse consecutive duplicate lines (repeating errors).
+      4. Collapse runs of blank lines to a single blank.
+    """
+    # --- Timestamps ---
+    # ISO-8601 / syslog / common log format variants
+    timestamp_patterns = [
+        # [2023-10-12 14:00:00.123] or 2023-10-12T14:00:00Z
+        r"\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}(?:[.,]\d+)?(?:Z|[+-]\d{2}:\d{2})?\s*",
+        # [12/Oct/2023:14:00:00 +0000]
+        r"\[\d{2}/\w+/\d{4}:\d{2}:\d{2}:\d{2} [+-]\d{4}\]\s*",
+        # Jan 12 14:00:00
+        r"\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+\d{1,2}\s+\d{2}:\d{2}:\d{2}\s*",
+        # [14:00:00]
+        r"\[\d{2}:\d{2}:\d{2}(?:\.\d+)?\]\s*",
+    ]
+    for pat in timestamp_patterns:
+        text = re.sub(pat, "", text)
+    # --- IP addresses (aggression >= 2) ---
+    if aggression >= 2:
+        text = re.sub(
+            r"\b(?:\d{1,3}\.){3}\d{1,3}(?::\d+)?\b",
+            "<ip>",
+            text,
+        )
+    # --- Collapse consecutive duplicate lines ---
+    lines = text.splitlines()
+    deduped: list[str] = []
+    prev = None
+    repeat_count = 0
+    for line in lines:
+        stripped = line.strip()
+        if stripped == prev and stripped:  # skip blank dedup here
+            repeat_count += 1
+            if repeat_count == 1:
+                deduped.append(f"  [repeated {repeat_count + 1}x ↑]")
+            else:
+                deduped[-1] = f"  [repeated {repeat_count + 1}x ↑]"
+        else:
+            repeat_count = 0
+            deduped.append(line)
+            prev = stripped
+    # --- Collapse blank lines ---
+    text = "\n".join(deduped)
+    text = re.sub(r"\n{3,}", "\n\n", text)
+    # --- Strip leading/trailing whitespace per line (aggression 3) ---
+    if aggression == 3:
+        text = "\n".join(l.strip() for l in text.splitlines())
+        text = re.sub(r"\n{2,}", "\n", text)
+    return text.strip()
+def compress_code(text: str, aggression: int) -> str:
+    """
+    Regex-based code comment & whitespace stripping:
+      - Remove /* ... */ block comments (including docblock variants /** */)
+      - Remove Python/Ruby # single-line comments
+      - Remove C++/JS // single-line comments
+      - Remove Python/Java triple-quoted docstrings (aggression >= 2)
+      - Remove blank / whitespace-only lines (aggression >= 2)
+      - Strip trailing whitespace and over-indent (aggression 3)
+    """
+    # --- Block comments: /* ... */ (non-greedy, dotall) ---
+    text = re.sub(r"/\*.*?\*/", "", text, flags=re.DOTALL)
+    # --- Triple-quoted Python docstrings (aggression >= 2) ---
+    if aggression >= 2:
+        text = re.sub(r'""".*?"""', "", text, flags=re.DOTALL)
+        text = re.sub(r"'''.*?'''", "", text, flags=re.DOTALL)
+    # --- Single-line comments ---
+    # // comments (not inside strings — best-effort with regex)
+    text = re.sub(r"(?m)(?<!:)//.*$", "", text)
+    # # comments — skip shebang on line 1
+    text = re.sub(r"(?m)(?<!^#!)#.*$", "", text)
+    # --- Trailing whitespace ---
+    text = re.sub(r"(?m)[ \t]+$", "", text)
+    # --- Blank lines (aggression >= 2) ---
+    if aggression >= 2:
+        text = re.sub(r"\n{2,}", "\n", text)
+    # --- Aggressive: remove all indentation & collapse to single lines per block ---
+    if aggression == 3:
+        text = re.sub(r"(?m)^[ \t]+", "", text)
+    return text.strip()
 def compress_text(text: str, aggression: int) -> str:
+    """
+    Semantic compression via LLMlingua PromptCompressor.
+    Falls back gracefully if the model cannot be loaded.
+    """
     compressor = _get_llmlingua()
     ratio = _TEXT_RATIOS[aggression]
     result = compressor.compress_prompt(
         text,
         rate=ratio,
+        force_tokens=["\n"],     # preserve newline structure
     )
     return result.get("compressed_prompt", text)
 # ── Schemas ───────────────────────────────────────────────────────────────────
+class Mode(str, Enum):
+    text = "text"
+    code = "code"
+    logs = "logs"
 class CompressRequest(BaseModel):
+    text: str = Field(..., description="The raw text, code, or log to compress.")
+    mode: Mode = Field(Mode.text, description="Compression strategy: text | code | logs.")
     aggression_level: int = Field(
         2,
         ge=1,
         le=3,
+        description="1 = gentle, 2 = balanced, 3 = aggressive.",
     )
     original_tokens: int
     new_tokens: int
     tokens_saved: int
+    percent_saved: float = Field(..., description="Percentage of tokens removed.")
+    dollars_saved: float = Field(..., description="Estimated API cost delta in USD.")
+    mode: Mode
     aggression_level: int
 # ── Endpoints ─────────────────────────────────────────────────────────────────
 @app.get("/health", tags=["Health"])
 async def health_check():
+    """Check that the API is alive and responding."""
+    return {"status": "ok", "service": "promptzip-api"}
 @app.post("/api/tokenize", tags=["Tokenizer"])
 async def tokenize(body: dict):
+    """Count exact tokens and return estimated cost."""
     text = body.get("text", "")
     tokens = count_tokens(text)
     return {
 @app.post("/api/compress", response_model=CompressResponse, tags=["Compress"])
 async def compress(body: CompressRequest):
     """
+    Compress *text* using the chosen strategy:
+    - **logs** — regex strips timestamps, IPs, and repeating lines
+    - **code** — regex strips comments, docstrings, blank lines
+    - **text** — semantic compression via LLMlingua PromptCompressor
     """
     if not body.text.strip():
         raise HTTPException(status_code=400, detail="text must not be empty.")
+    dispatch = {
+        Mode.logs: compress_logs,
+        Mode.code: compress_code,
+        Mode.text: compress_text,
+    }
+    compressed = dispatch[body.mode](body.text, body.aggression_level)
     original_tokens = count_tokens(body.text)
     new_tokens = count_tokens(compressed)
         new_tokens=new_tokens,
         tokens_saved=saved,
         percent_saved=pct,
+        dollars_saved=round(estimate_cost(original_tokens) - estimate_cost(new_tokens), 6),
+        mode=body.mode,
         aggression_level=body.aggression_level,
     )