viskav commited on
Commit
939d683
Β·
verified Β·
1 Parent(s): 533de3d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -38
app.py CHANGED
@@ -1,31 +1,50 @@
1
  from fastapi import FastAPI, HTTPException
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from pydantic import BaseModel, Field
4
- from llama_cpp import Llama
5
  from contextlib import asynccontextmanager
6
  import re
 
 
 
 
 
 
7
 
8
  MODEL_REPO = "bartowski/Phi-3.5-mini-instruct-GGUF"
9
  MODEL_FILE = "Phi-3.5-mini-instruct-Q4_K_M.gguf"
10
 
11
- llm = None # global reference
12
 
13
  @asynccontextmanager
14
  async def lifespan(app: FastAPI):
15
  global llm
16
- print("πŸš€ Loading Phi-3.5 Mini (Fast Summarizer)...")
17
- llm = Llama.from_pretrained(
18
- repo_id=MODEL_REPO,
19
- filename=MODEL_FILE,
20
- n_threads=4,
21
- n_ctx=2048,
22
- n_batch=256,
23
- n_gpu_layers=0,
24
- verbose=False,
25
- )
26
- print("βœ… Model loaded")
 
 
 
 
 
 
 
 
 
 
 
27
  yield
 
28
  print("πŸ›‘ Shutting down...")
 
 
29
 
30
  app = FastAPI(
31
  title="AI Summarizer",
@@ -43,7 +62,7 @@ app.add_middleware(
43
 
44
  class SummarizeRequest(BaseModel):
45
  text: str = Field(..., min_length=1, max_length=2000)
46
- length: str = "short"
47
 
48
  LENGTH_INSTRUCTIONS = {
49
  "short": "Summarize in 2–3 concise sentences.",
@@ -52,6 +71,7 @@ LENGTH_INSTRUCTIONS = {
52
  }
53
 
54
  def clean_output(text: str) -> str:
 
55
  text = re.sub(r"<\|.*?\|>", "", text)
56
  text = re.sub(r"\s+", " ", text)
57
  return text.strip()
@@ -59,36 +79,81 @@ def clean_output(text: str) -> str:
59
  @app.post("/api/summarize")
60
  async def summarize(req: SummarizeRequest):
61
  if llm is None:
62
- raise HTTPException(status_code=503, detail="Model not loaded")
63
-
64
- text = req.text.strip()
65
- length_instruction = LENGTH_INSTRUCTIONS.get(req.length, LENGTH_INSTRUCTIONS["short"])
66
-
67
- prompt = f"""
68
- <|user|>
 
 
 
 
 
 
69
  You are an expert text summarizer.
70
  {length_instruction}
71
 
72
  Text:
73
  {text}
74
  <|end|>
75
- <|assistant|>
76
- """
77
-
78
- output = llm(
79
- prompt,
80
- max_tokens=140 if req.length == "short" else 220,
81
- temperature=0.3,
82
- top_p=0.9,
83
- top_k=40,
84
- repeat_penalty=1.05,
85
- stop=["<|end|>", "<|user|>"],
86
- )
87
-
88
- summary = clean_output(output["choices"][0]["text"])
89
- return {"summary": summary, "success": True}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
  @app.get("/")
92
  def health():
93
- return {"status": "ok", "model": MODEL_FILE}
94
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from fastapi import FastAPI, HTTPException
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from pydantic import BaseModel, Field
 
4
  from contextlib import asynccontextmanager
5
  import re
6
+ import os
7
+
8
+ try:
9
+ from llama_cpp import Llama
10
+ except ImportError:
11
+ raise ImportError("Install llama-cpp-python: pip install llama-cpp-python")
12
 
13
  MODEL_REPO = "bartowski/Phi-3.5-mini-instruct-GGUF"
14
  MODEL_FILE = "Phi-3.5-mini-instruct-Q4_K_M.gguf"
15
 
16
+ llm = None
17
 
18
  @asynccontextmanager
19
  async def lifespan(app: FastAPI):
20
  global llm
21
+ try:
22
+ print("πŸš€ Loading Phi-3.5 Mini (Fast Summarizer)...")
23
+
24
+ # Try to load model with error handling
25
+ llm = Llama.from_pretrained(
26
+ repo_id=MODEL_REPO,
27
+ filename=MODEL_FILE,
28
+ n_threads=4,
29
+ n_ctx=2048,
30
+ n_batch=256,
31
+ n_gpu_layers=0,
32
+ verbose=False,
33
+ )
34
+ print("βœ… Model loaded successfully")
35
+ except Exception as e:
36
+ print(f"❌ Error loading model: {e}")
37
+ print("Make sure you have:")
38
+ print("1. Installed llama-cpp-python")
39
+ print("2. Have internet connection for model download")
40
+ print("3. Have sufficient disk space (~2GB)")
41
+ llm = None
42
+
43
  yield
44
+
45
  print("πŸ›‘ Shutting down...")
46
+ if llm:
47
+ del llm
48
 
49
  app = FastAPI(
50
  title="AI Summarizer",
 
62
 
63
  class SummarizeRequest(BaseModel):
64
  text: str = Field(..., min_length=1, max_length=2000)
65
+ length: str = Field(default="short", pattern="^(short|medium|long)$")
66
 
67
  LENGTH_INSTRUCTIONS = {
68
  "short": "Summarize in 2–3 concise sentences.",
 
71
  }
72
 
73
  def clean_output(text: str) -> str:
74
+ """Clean model output from special tokens"""
75
  text = re.sub(r"<\|.*?\|>", "", text)
76
  text = re.sub(r"\s+", " ", text)
77
  return text.strip()
 
79
  @app.post("/api/summarize")
80
  async def summarize(req: SummarizeRequest):
81
  if llm is None:
82
+ raise HTTPException(
83
+ status_code=503,
84
+ detail="Model not loaded. Check server logs for errors."
85
+ )
86
+
87
+ try:
88
+ text = req.text.strip()
89
+ length_instruction = LENGTH_INSTRUCTIONS.get(
90
+ req.length,
91
+ LENGTH_INSTRUCTIONS["short"]
92
+ )
93
+
94
+ prompt = f"""<|user|>
95
  You are an expert text summarizer.
96
  {length_instruction}
97
 
98
  Text:
99
  {text}
100
  <|end|>
101
+ <|assistant|>"""
102
+
103
+ max_tokens_map = {
104
+ "short": 140,
105
+ "medium": 220,
106
+ "long": 300
107
+ }
108
+
109
+ output = llm(
110
+ prompt,
111
+ max_tokens=max_tokens_map.get(req.length, 140),
112
+ temperature=0.3,
113
+ top_p=0.9,
114
+ top_k=40,
115
+ repeat_penalty=1.05,
116
+ stop=["<|end|>", "<|user|>"],
117
+ echo=False
118
+ )
119
+
120
+ summary = clean_output(output["choices"][0]["text"])
121
+
122
+ if not summary:
123
+ raise HTTPException(
124
+ status_code=500,
125
+ detail="Model produced empty output"
126
+ )
127
+
128
+ return {
129
+ "summary": summary,
130
+ "success": True,
131
+ "length": req.length
132
+ }
133
+
134
+ except Exception as e:
135
+ raise HTTPException(
136
+ status_code=500,
137
+ detail=f"Summarization error: {str(e)}"
138
+ )
139
 
140
  @app.get("/")
141
  def health():
142
+ return {
143
+ "status": "ok" if llm else "model_not_loaded",
144
+ "model": MODEL_FILE,
145
+ "ready": llm is not None
146
+ }
147
+
148
+ @app.get("/health")
149
+ def detailed_health():
150
+ return {
151
+ "status": "healthy" if llm else "unhealthy",
152
+ "model_loaded": llm is not None,
153
+ "model_name": MODEL_FILE,
154
+ "repo": MODEL_REPO
155
+ }
156
+
157
+ if __name__ == "__main__":
158
+ import uvicorn
159
+ uvicorn.run(app, host="0.0.0.0", port=8000)