will702 commited on
Commit
bcbf797
·
verified ·
1 Parent(s): c8eaebe

Upload 4 files

Browse files
Files changed (2) hide show
  1. app.py +29 -121
  2. requirements.txt +2 -3
app.py CHANGED
@@ -1,54 +1,32 @@
1
- import json
2
  import os
3
- import re
4
  from contextlib import asynccontextmanager
5
 
6
-
7
  from fastapi import FastAPI, HTTPException, Request
8
  from pydantic import BaseModel
 
9
 
10
- MODEL_NAME = "Qwen/Qwen3.5-9B:together"
11
  API_KEY = os.getenv("API_KEY")
12
- HF_TOKEN = os.getenv("HF_TOKEN")
13
 
14
- # Will hold either InferenceClient or local model+tokenizer
15
- inference_client = None
16
- local_model = None
17
- local_tokenizer = None
 
 
 
 
 
 
 
18
 
19
 
20
  @asynccontextmanager
21
  async def lifespan(app: FastAPI):
22
- global inference_client, local_model, local_tokenizer
23
-
24
- if HF_TOKEN:
25
- # Option 1: HF Inference API (GPU-backed, fast)
26
- print("HF_TOKEN found — using HF Inference API")
27
- from huggingface_hub import InferenceClient
28
- inference_client = InferenceClient(
29
- api_key=HF_TOKEN,
30
- )
31
- print("Inference client ready.")
32
- else:
33
- # Option 2: Local model with INT8 quantization (CPU fallback)
34
- print("No HF_TOKEN — loading model locally with INT8 quantization")
35
- import torch
36
- from transformers import AutoModelForCausalLM, AutoTokenizer
37
- import torch.quantization
38
-
39
- local_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
40
- model = AutoModelForCausalLM.from_pretrained(
41
- MODEL_NAME,
42
- torch_dtype=torch.float32,
43
- device_map="cpu",
44
- )
45
- # Apply dynamic INT8 quantization for faster CPU inference
46
- local_model = torch.quantization.quantize_dynamic(
47
- model, {torch.nn.Linear}, dtype=torch.qint8
48
- )
49
- local_model.eval()
50
- print("Local INT8 model ready.")
51
-
52
  yield
53
 
54
 
@@ -59,72 +37,6 @@ class PredictRequest(BaseModel):
59
  texts: list[str]
60
 
61
 
62
- SYSTEM_PROMPT = "Indonesian stock news sentiment analyzer. Return ONLY a JSON array of [sentiment, score] pairs where sentiment is positive/negative/neutral and score is 0.0-1.0. No markdown, no explanation."
63
-
64
-
65
- def build_prompt(texts: list[str]) -> str:
66
- headlines = "\n".join(f"{i+1}. {t}" for i, t in enumerate(texts))
67
- return f"{headlines}\n/no_think"
68
-
69
-
70
- def parse_response(raw: str, texts: list[str]) -> list[dict]:
71
- raw = re.sub(r"<think>.*?</think>", "", raw, flags=re.DOTALL).strip()
72
- match = re.search(r"\[.*\]", raw, re.DOTALL)
73
- if match:
74
- try:
75
- parsed = json.loads(match.group())
76
- # Index-based: [[sentiment, score], ...]
77
- if isinstance(parsed, list) and len(parsed) == len(texts):
78
- if isinstance(parsed[0], list):
79
- return [
80
- {"text": texts[i], "sentiment": parsed[i][0], "score": parsed[i][1]}
81
- for i in range(len(texts))
82
- ]
83
- # Fallback: old object format still works
84
- return parsed
85
- except (json.JSONDecodeError, IndexError, KeyError):
86
- pass
87
- return [{"text": t, "sentiment": "neutral", "score": 0.5} for t in texts]
88
-
89
-
90
- def run_hf_api(texts: list[str]) -> str:
91
- messages = [
92
- {"role": "system", "content": SYSTEM_PROMPT},
93
- {"role": "user", "content": build_prompt(texts)},
94
- ]
95
- response = inference_client.chat.completions.create(
96
- model=MODEL_NAME,
97
- messages=messages,
98
- max_tokens=256,
99
- temperature=0.1,
100
- )
101
- return response.choices[0].message.content or ""
102
-
103
-
104
- def run_local(texts: list[str]) -> str:
105
- import torch
106
- messages = [
107
- {"role": "system", "content": SYSTEM_PROMPT},
108
- {"role": "user", "content": build_prompt(texts)},
109
- ]
110
- text_input = local_tokenizer.apply_chat_template(
111
- messages,
112
- tokenize=False,
113
- add_generation_prompt=True,
114
- enable_thinking=False,
115
- )
116
- inputs = local_tokenizer(text_input, return_tensors="pt")
117
- with torch.no_grad():
118
- outputs = local_model.generate(
119
- **inputs,
120
- max_new_tokens=256,
121
- do_sample=False,
122
- pad_token_id=local_tokenizer.eos_token_id,
123
- )
124
- generated = outputs[0][inputs["input_ids"].shape[1]:]
125
- return local_tokenizer.decode(generated, skip_special_tokens=True)
126
-
127
-
128
  @app.post("/predict")
129
  async def predict(body: PredictRequest, request: Request):
130
  if API_KEY:
@@ -138,27 +50,23 @@ async def predict(body: PredictRequest, request: Request):
138
  if len(texts) > 20:
139
  raise HTTPException(status_code=400, detail="Maximum 20 texts per request")
140
 
141
- if inference_client is None and local_model is None:
142
  raise HTTPException(status_code=503, detail="Model not loaded yet")
143
 
144
- raw = run_hf_api(texts) if inference_client else run_local(texts)
145
- results = parse_response(raw, texts)
146
-
147
- normalized = []
148
- for r in results:
149
- sentiment = str(r.get("sentiment", "neutral")).lower()
150
- if sentiment not in ("positive", "negative", "neutral"):
151
- sentiment = "neutral"
152
- normalized.append({
153
- "text": r.get("text", ""),
154
- "sentiment": sentiment,
155
- "score": round(float(r.get("score", 0.5)), 4),
156
  })
157
 
158
- return {"results": normalized, "_raw": raw}
159
 
160
 
161
  @app.get("/health")
162
  def health():
163
- mode = "hf_api" if inference_client else "local_int8" if local_model else "not_loaded"
164
- return {"status": "ok", "mode": mode}
 
 
1
  import os
 
2
  from contextlib import asynccontextmanager
3
 
 
4
  from fastapi import FastAPI, HTTPException, Request
5
  from pydantic import BaseModel
6
+ from transformers import pipeline
7
 
8
+ MODEL_NAME = "ayameRushia/bert-base-indonesian-1.5G-sentiment-analysis-smsa"
9
  API_KEY = os.getenv("API_KEY")
 
10
 
11
+ # Label mapping IndoNLU SMSA: positive/neutral/negative
12
+ LABEL_MAP = {
13
+ "positive": "positive",
14
+ "neutral": "neutral",
15
+ "negative": "negative",
16
+ "label_0": "positive",
17
+ "label_1": "neutral",
18
+ "label_2": "negative",
19
+ }
20
+
21
+ classifier = None
22
 
23
 
24
  @asynccontextmanager
25
  async def lifespan(app: FastAPI):
26
+ global classifier
27
+ print(f"Loading model: {MODEL_NAME}")
28
+ classifier = pipeline("text-classification", model=MODEL_NAME)
29
+ print("Model loaded.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  yield
31
 
32
 
 
37
  texts: list[str]
38
 
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  @app.post("/predict")
41
  async def predict(body: PredictRequest, request: Request):
42
  if API_KEY:
 
50
  if len(texts) > 20:
51
  raise HTTPException(status_code=400, detail="Maximum 20 texts per request")
52
 
53
+ if classifier is None:
54
  raise HTTPException(status_code=503, detail="Model not loaded yet")
55
 
56
+ predictions = classifier(texts, truncation=True, max_length=512)
57
+
58
+ results = []
59
+ for text, pred in zip(texts, predictions):
60
+ label = LABEL_MAP.get(pred["label"].lower(), "neutral")
61
+ results.append({
62
+ "text": text,
63
+ "sentiment": label,
64
+ "score": round(pred["score"], 4),
 
 
 
65
  })
66
 
67
+ return {"results": results}
68
 
69
 
70
  @app.get("/health")
71
  def health():
72
+ return {"status": "ok", "model_loaded": classifier is not None}
 
requirements.txt CHANGED
@@ -1,5 +1,4 @@
1
  fastapi==0.115.5
2
  uvicorn[standard]==0.32.1
3
- transformers
4
- accelerate>=0.26.0
5
- huggingface_hub
 
1
  fastapi==0.115.5
2
  uvicorn[standard]==0.32.1
3
+ transformers>=4.51.0
4
+ torch==2.5.1