Spaces:
Sleeping
Sleeping
Kush-Singh-26 commited on
Commit ·
945ac57
1
Parent(s): 36e0168
Deploy with full python image
Browse files- .gitattributes +1 -0
- Dockerfile +23 -0
- app/final_model.gguf +3 -0
- app/main.py +174 -0
- requirements.txt +4 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*.gguf filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use FULL Python image (Not slim) to ensure pre-built wheels work
|
| 2 |
+
FROM python:3.10
|
| 3 |
+
|
| 4 |
+
WORKDIR /code
|
| 5 |
+
|
| 6 |
+
# 1. Install Dependencies
|
| 7 |
+
COPY ./requirements.txt /code/requirements.txt
|
| 8 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
| 9 |
+
|
| 10 |
+
# 2. Install llama-cpp-python (Pre-built Binary)
|
| 11 |
+
# Because we are using the full python image, this should now work
|
| 12 |
+
# without compiling gcc, saving 15 minutes.
|
| 13 |
+
RUN pip install llama-cpp-python \
|
| 14 |
+
--extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
|
| 15 |
+
|
| 16 |
+
# 3. Download NLTK Data
|
| 17 |
+
RUN python -m nltk.downloader punkt punkt_tab vader_lexicon
|
| 18 |
+
|
| 19 |
+
# 4. Copy App
|
| 20 |
+
COPY ./app /code/app
|
| 21 |
+
|
| 22 |
+
# 5. Start
|
| 23 |
+
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
app/final_model.gguf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:046cf2e43a39736a6cd25879dba9d04a9b9b20a6d6476ab1ce70c4d406f65a2f
|
| 3 |
+
size 397807264
|
app/main.py
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import time
|
| 3 |
+
import nltk
|
| 4 |
+
from nltk.sentiment import SentimentIntensityAnalyzer
|
| 5 |
+
from fastapi import FastAPI, HTTPException
|
| 6 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 7 |
+
from pydantic import BaseModel
|
| 8 |
+
from contextlib import asynccontextmanager
|
| 9 |
+
from llama_cpp import Llama
|
| 10 |
+
|
| 11 |
+
# ==========================================
|
| 12 |
+
# CONFIGURATION
|
| 13 |
+
# ==========================================
|
| 14 |
+
MODEL_PATH = "app/final_model.gguf"
|
| 15 |
+
SENTIMENT_THRESHOLD = 0.05
|
| 16 |
+
|
| 17 |
+
ml_resources = {}
|
| 18 |
+
|
| 19 |
+
@asynccontextmanager
|
| 20 |
+
async def lifespan(app: FastAPI):
|
| 21 |
+
print("⚡ Loading NLP tools...")
|
| 22 |
+
|
| 23 |
+
try:
|
| 24 |
+
nltk.data.find('tokenizers/punkt')
|
| 25 |
+
except LookupError:
|
| 26 |
+
nltk.download('punkt')
|
| 27 |
+
nltk.download('punkt_tab')
|
| 28 |
+
nltk.download('vader_lexicon')
|
| 29 |
+
|
| 30 |
+
ml_resources["analyzer"] = SentimentIntensityAnalyzer()
|
| 31 |
+
|
| 32 |
+
print(f"⚡ Loading Llama Model from {MODEL_PATH}...")
|
| 33 |
+
if not os.path.exists(MODEL_PATH):
|
| 34 |
+
print(f"❌ CRITICAL ERROR: Model not found at {MODEL_PATH}")
|
| 35 |
+
else:
|
| 36 |
+
try:
|
| 37 |
+
ml_resources["llm"] = Llama(
|
| 38 |
+
model_path=MODEL_PATH,
|
| 39 |
+
n_ctx=2048,
|
| 40 |
+
n_gpu_layers=0,
|
| 41 |
+
verbose=False
|
| 42 |
+
)
|
| 43 |
+
print("✅ Model loaded successfully!")
|
| 44 |
+
except Exception as e:
|
| 45 |
+
print(f"❌ Failed to load model: {e}")
|
| 46 |
+
|
| 47 |
+
yield
|
| 48 |
+
ml_resources.clear()
|
| 49 |
+
|
| 50 |
+
app = FastAPI(lifespan=lifespan)
|
| 51 |
+
|
| 52 |
+
app.add_middleware(
|
| 53 |
+
CORSMiddleware,
|
| 54 |
+
allow_origins=["*"],
|
| 55 |
+
allow_credentials=True,
|
| 56 |
+
allow_methods=["*"],
|
| 57 |
+
allow_headers=["*"],
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
class ToneRequest(BaseModel):
|
| 61 |
+
text: str
|
| 62 |
+
style: str
|
| 63 |
+
|
| 64 |
+
class ToneResponse(BaseModel):
|
| 65 |
+
original: str
|
| 66 |
+
transformed: str
|
| 67 |
+
latency_ms: float
|
| 68 |
+
changes_made: int
|
| 69 |
+
|
| 70 |
+
def should_rewrite(sentence):
|
| 71 |
+
toxic_keywords = [
|
| 72 |
+
"garbage", "trash", "stupid", "dumb", "idiot", "hate", "terrible",
|
| 73 |
+
"horrible", "awful", "suck", "useless", "incompetent", "mess", "disaster"
|
| 74 |
+
]
|
| 75 |
+
for word in toxic_keywords:
|
| 76 |
+
if word in sentence.lower():
|
| 77 |
+
return True, f"Keyword: {word}"
|
| 78 |
+
|
| 79 |
+
sia = ml_resources["analyzer"]
|
| 80 |
+
scores = sia.polarity_scores(sentence)
|
| 81 |
+
if scores['compound'] < SENTIMENT_THRESHOLD:
|
| 82 |
+
return True, f"Sentiment: {scores['compound']}"
|
| 83 |
+
|
| 84 |
+
return False, "Neutral"
|
| 85 |
+
|
| 86 |
+
@app.post("/transform", response_model=ToneResponse)
|
| 87 |
+
async def transform_text(request: ToneRequest):
|
| 88 |
+
if "llm" not in ml_resources:
|
| 89 |
+
raise HTTPException(status_code=503, detail="Model not loaded")
|
| 90 |
+
|
| 91 |
+
llm = ml_resources["llm"]
|
| 92 |
+
start_time = time.time()
|
| 93 |
+
|
| 94 |
+
# Split by newlines to preserve paragraphs
|
| 95 |
+
original_lines = request.text.split('\n')
|
| 96 |
+
final_text_blocks = []
|
| 97 |
+
changes_count = 0
|
| 98 |
+
|
| 99 |
+
print(f"\n--- Request ({request.style}) ---")
|
| 100 |
+
|
| 101 |
+
for line in original_lines:
|
| 102 |
+
if not line.strip():
|
| 103 |
+
final_text_blocks.append("")
|
| 104 |
+
continue
|
| 105 |
+
|
| 106 |
+
try:
|
| 107 |
+
sentences = nltk.sent_tokenize(line)
|
| 108 |
+
except:
|
| 109 |
+
sentences = [line]
|
| 110 |
+
|
| 111 |
+
line_rewrites = []
|
| 112 |
+
|
| 113 |
+
for sent in sentences:
|
| 114 |
+
clean_sent = sent.strip()
|
| 115 |
+
if not clean_sent: continue
|
| 116 |
+
|
| 117 |
+
needs_fix, reason = should_rewrite(clean_sent)
|
| 118 |
+
|
| 119 |
+
if needs_fix:
|
| 120 |
+
print(f" ⚠️ REWRITING: '{clean_sent[:20]}...' -> {reason}")
|
| 121 |
+
|
| 122 |
+
system_prompt = (
|
| 123 |
+
"You are a professional editor. Rewrite the input text to be polite and corporate. "
|
| 124 |
+
"Keep the exact same meaning and speaker perspective."
|
| 125 |
+
)
|
| 126 |
+
|
| 127 |
+
if request.style.lower() == "casual":
|
| 128 |
+
examples = [
|
| 129 |
+
{"role": "user", "content": "Rewrite: This is garbage."},
|
| 130 |
+
{"role": "assistant", "content": "This isn't really working for me."},
|
| 131 |
+
{"role": "user", "content": "Rewrite: You are so lazy."},
|
| 132 |
+
{"role": "assistant", "content": "I feel like we could be moving faster."}
|
| 133 |
+
]
|
| 134 |
+
else:
|
| 135 |
+
examples = [
|
| 136 |
+
{"role": "user", "content": "Rewrite: This design is garbage."},
|
| 137 |
+
{"role": "assistant", "content": "The current design does not meet the requirements."},
|
| 138 |
+
{"role": "user", "content": "Rewrite: I don't know why I hired you."},
|
| 139 |
+
{"role": "assistant", "content": "I am concerned about the value being delivered."}
|
| 140 |
+
]
|
| 141 |
+
|
| 142 |
+
messages = [{"role": "system", "content": system_prompt}]
|
| 143 |
+
messages.extend(examples)
|
| 144 |
+
messages.append({"role": "user", "content": f"Rewrite: {clean_sent}"})
|
| 145 |
+
|
| 146 |
+
output = llm.create_chat_completion(
|
| 147 |
+
messages=messages,
|
| 148 |
+
temperature=0.1,
|
| 149 |
+
max_tokens=128
|
| 150 |
+
)
|
| 151 |
+
|
| 152 |
+
new_text = output['choices'][0]['message']['content'].strip()
|
| 153 |
+
if "Rewrite:" in new_text: new_text = new_text.replace("Rewrite:", "").strip()
|
| 154 |
+
|
| 155 |
+
print(f" -> {new_text}")
|
| 156 |
+
line_rewrites.append(new_text)
|
| 157 |
+
changes_count += 1
|
| 158 |
+
else:
|
| 159 |
+
line_rewrites.append(clean_sent)
|
| 160 |
+
|
| 161 |
+
final_text_blocks.append(" ".join(line_rewrites))
|
| 162 |
+
|
| 163 |
+
final_output = "\n".join(final_text_blocks)
|
| 164 |
+
|
| 165 |
+
return ToneResponse(
|
| 166 |
+
original=request.text,
|
| 167 |
+
transformed=final_output,
|
| 168 |
+
latency_ms=round((time.time() - start_time) * 1000, 2),
|
| 169 |
+
changes_made=changes_count
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
@app.get("/")
|
| 173 |
+
def read_root():
|
| 174 |
+
return {"status": "online", "model": "Qwen-GGUF-Quantized"}
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn
|
| 3 |
+
pydantic
|
| 4 |
+
nltk
|