Kush-Singh-26 commited on
Commit
945ac57
·
1 Parent(s): 36e0168

Deploy with full python image

Browse files
Files changed (5) hide show
  1. .gitattributes +1 -0
  2. Dockerfile +23 -0
  3. app/final_model.gguf +3 -0
  4. app/main.py +174 -0
  5. requirements.txt +4 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.gguf filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use FULL Python image (Not slim) to ensure pre-built wheels work
2
+ FROM python:3.10
3
+
4
+ WORKDIR /code
5
+
6
+ # 1. Install Dependencies
7
+ COPY ./requirements.txt /code/requirements.txt
8
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
9
+
10
+ # 2. Install llama-cpp-python (Pre-built Binary)
11
+ # Because we are using the full python image, this should now work
12
+ # without compiling gcc, saving 15 minutes.
13
+ RUN pip install llama-cpp-python \
14
+ --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
15
+
16
+ # 3. Download NLTK Data
17
+ RUN python -m nltk.downloader punkt punkt_tab vader_lexicon
18
+
19
+ # 4. Copy App
20
+ COPY ./app /code/app
21
+
22
+ # 5. Start
23
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
app/final_model.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:046cf2e43a39736a6cd25879dba9d04a9b9b20a6d6476ab1ce70c4d406f65a2f
3
+ size 397807264
app/main.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import nltk
4
+ from nltk.sentiment import SentimentIntensityAnalyzer
5
+ from fastapi import FastAPI, HTTPException
6
+ from fastapi.middleware.cors import CORSMiddleware
7
+ from pydantic import BaseModel
8
+ from contextlib import asynccontextmanager
9
+ from llama_cpp import Llama
10
+
11
+ # ==========================================
12
+ # CONFIGURATION
13
+ # ==========================================
14
+ MODEL_PATH = "app/final_model.gguf"
15
+ SENTIMENT_THRESHOLD = 0.05
16
+
17
+ ml_resources = {}
18
+
19
+ @asynccontextmanager
20
+ async def lifespan(app: FastAPI):
21
+ print("⚡ Loading NLP tools...")
22
+
23
+ try:
24
+ nltk.data.find('tokenizers/punkt')
25
+ except LookupError:
26
+ nltk.download('punkt')
27
+ nltk.download('punkt_tab')
28
+ nltk.download('vader_lexicon')
29
+
30
+ ml_resources["analyzer"] = SentimentIntensityAnalyzer()
31
+
32
+ print(f"⚡ Loading Llama Model from {MODEL_PATH}...")
33
+ if not os.path.exists(MODEL_PATH):
34
+ print(f"❌ CRITICAL ERROR: Model not found at {MODEL_PATH}")
35
+ else:
36
+ try:
37
+ ml_resources["llm"] = Llama(
38
+ model_path=MODEL_PATH,
39
+ n_ctx=2048,
40
+ n_gpu_layers=0,
41
+ verbose=False
42
+ )
43
+ print("✅ Model loaded successfully!")
44
+ except Exception as e:
45
+ print(f"❌ Failed to load model: {e}")
46
+
47
+ yield
48
+ ml_resources.clear()
49
+
50
+ app = FastAPI(lifespan=lifespan)
51
+
52
+ app.add_middleware(
53
+ CORSMiddleware,
54
+ allow_origins=["*"],
55
+ allow_credentials=True,
56
+ allow_methods=["*"],
57
+ allow_headers=["*"],
58
+ )
59
+
60
+ class ToneRequest(BaseModel):
61
+ text: str
62
+ style: str
63
+
64
+ class ToneResponse(BaseModel):
65
+ original: str
66
+ transformed: str
67
+ latency_ms: float
68
+ changes_made: int
69
+
70
+ def should_rewrite(sentence):
71
+ toxic_keywords = [
72
+ "garbage", "trash", "stupid", "dumb", "idiot", "hate", "terrible",
73
+ "horrible", "awful", "suck", "useless", "incompetent", "mess", "disaster"
74
+ ]
75
+ for word in toxic_keywords:
76
+ if word in sentence.lower():
77
+ return True, f"Keyword: {word}"
78
+
79
+ sia = ml_resources["analyzer"]
80
+ scores = sia.polarity_scores(sentence)
81
+ if scores['compound'] < SENTIMENT_THRESHOLD:
82
+ return True, f"Sentiment: {scores['compound']}"
83
+
84
+ return False, "Neutral"
85
+
86
+ @app.post("/transform", response_model=ToneResponse)
87
+ async def transform_text(request: ToneRequest):
88
+ if "llm" not in ml_resources:
89
+ raise HTTPException(status_code=503, detail="Model not loaded")
90
+
91
+ llm = ml_resources["llm"]
92
+ start_time = time.time()
93
+
94
+ # Split by newlines to preserve paragraphs
95
+ original_lines = request.text.split('\n')
96
+ final_text_blocks = []
97
+ changes_count = 0
98
+
99
+ print(f"\n--- Request ({request.style}) ---")
100
+
101
+ for line in original_lines:
102
+ if not line.strip():
103
+ final_text_blocks.append("")
104
+ continue
105
+
106
+ try:
107
+ sentences = nltk.sent_tokenize(line)
108
+ except:
109
+ sentences = [line]
110
+
111
+ line_rewrites = []
112
+
113
+ for sent in sentences:
114
+ clean_sent = sent.strip()
115
+ if not clean_sent: continue
116
+
117
+ needs_fix, reason = should_rewrite(clean_sent)
118
+
119
+ if needs_fix:
120
+ print(f" ⚠️ REWRITING: '{clean_sent[:20]}...' -> {reason}")
121
+
122
+ system_prompt = (
123
+ "You are a professional editor. Rewrite the input text to be polite and corporate. "
124
+ "Keep the exact same meaning and speaker perspective."
125
+ )
126
+
127
+ if request.style.lower() == "casual":
128
+ examples = [
129
+ {"role": "user", "content": "Rewrite: This is garbage."},
130
+ {"role": "assistant", "content": "This isn't really working for me."},
131
+ {"role": "user", "content": "Rewrite: You are so lazy."},
132
+ {"role": "assistant", "content": "I feel like we could be moving faster."}
133
+ ]
134
+ else:
135
+ examples = [
136
+ {"role": "user", "content": "Rewrite: This design is garbage."},
137
+ {"role": "assistant", "content": "The current design does not meet the requirements."},
138
+ {"role": "user", "content": "Rewrite: I don't know why I hired you."},
139
+ {"role": "assistant", "content": "I am concerned about the value being delivered."}
140
+ ]
141
+
142
+ messages = [{"role": "system", "content": system_prompt}]
143
+ messages.extend(examples)
144
+ messages.append({"role": "user", "content": f"Rewrite: {clean_sent}"})
145
+
146
+ output = llm.create_chat_completion(
147
+ messages=messages,
148
+ temperature=0.1,
149
+ max_tokens=128
150
+ )
151
+
152
+ new_text = output['choices'][0]['message']['content'].strip()
153
+ if "Rewrite:" in new_text: new_text = new_text.replace("Rewrite:", "").strip()
154
+
155
+ print(f" -> {new_text}")
156
+ line_rewrites.append(new_text)
157
+ changes_count += 1
158
+ else:
159
+ line_rewrites.append(clean_sent)
160
+
161
+ final_text_blocks.append(" ".join(line_rewrites))
162
+
163
+ final_output = "\n".join(final_text_blocks)
164
+
165
+ return ToneResponse(
166
+ original=request.text,
167
+ transformed=final_output,
168
+ latency_ms=round((time.time() - start_time) * 1000, 2),
169
+ changes_made=changes_count
170
+ )
171
+
172
+ @app.get("/")
173
+ def read_root():
174
+ return {"status": "online", "model": "Qwen-GGUF-Quantized"}
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ pydantic
4
+ nltk