Spaces:

will702
/

sentiment-analysis

Sleeping

App Files Files Community

will702 commited on 18 days ago

Commit

84f612c

verified ·

1 Parent(s): 9f01dff

Upload app.py

Browse files

Files changed (1) hide show

app.py +156 -168

app.py CHANGED Viewed

@@ -1,168 +1,156 @@
- 6  from fastapi import FastAPI, HTTPException, Request
-       7  from pydantic import BaseModel
-       8 -from transformers import AutoModelForCausalLM, AutoTokenizer
-       9 -import torch
-       8
-       9  MODEL_NAME = "Qwen/Qwen3.5-0.8B"
-      10  API_KEY = os.getenv("API_KEY")
-      11 +HF_TOKEN = os.getenv("HF_TOKEN")
-      12
-      14 -tokenizer = None
-      15 -model = None
-      13 +# Will hold either InferenceClient or local model+tokenizer
-      14 +inference_client = None
-      15 +local_model = None
-      16 +local_tokenizer = None
-      17
-      18
-      19  @asynccontextmanager
-      20  async def lifespan(app: FastAPI):
-      20 -    global tokenizer, model
-      21 -    print(f"Loading model: {MODEL_NAME}")
-      22 -    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-      23 -    model = AutoModelForCausalLM.from_pretrained(
-      24 -        MODEL_NAME,
-      25 -        torch_dtype=torch.float32,  # CPU requires float32
-      26 -        device_map="cpu",
-      27 -    )
-      28 -    model.eval()
-      29 -    print("Model loaded.")
-      21 +    global inference_client, local_model, local_tokenizer
-      22 +
-      23 +    if HF_TOKEN:
-      24 +        # Option 1: HF Inference API (GPU-backed, fast)
-      25 +        print("HF_TOKEN found — using HF Inference API")
-      26 +        from huggingface_hub import InferenceClient
-      27 +        inference_client = InferenceClient(model=MODEL_NAME, token=HF_TOKEN)
-      28 +        print("Inference client ready.")
-      29 +    else:
-      30 +        # Option 2: Local model with INT8 quantization (CPU fallback)
-      31 +        print("No HF_TOKEN — loading model locally with INT8 quantization")
-      32 +        import torch
-      33 +        from transformers import AutoModelForCausalLM, AutoTokenizer
-      34 +        import torch.quantization
-      35 +
-      36 +        local_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-      37 +        model = AutoModelForCausalLM.from_pretrained(
-      38 +            MODEL_NAME,
-      39 +            torch_dtype=torch.float32,
-      40 +            device_map="cpu",
-      41 +        )
-      42 +        # Apply dynamic INT8 quantization for faster CPU inference
-      43 +        local_model = torch.quantization.quantize_dynamic(
-      44 +            model, {torch.nn.Linear}, dtype=torch.qint8
-      45 +        )
-      46 +        local_model.eval()
-      47 +        print("Local INT8 model ready.")
-      48 +
-      49      yield
-      50
-      51
-     ...
-      68
-      69
-      70  def parse_response(raw: str, texts: list[str]) -> list[dict]:
-      52 -    # Strip thinking tags if present
-      71      raw = re.sub(r"<think>.*?</think>", "", raw, flags=re.DOTALL).strip()
-      54 -    # Extract JSON array
-      72      match = re.search(r"\[.*\]", raw, re.DOTALL)
-      73      if match:
-      74          try:
-     ...
-       77                  return parsed
-       78          except json.JSONDecodeError:
-       79              pass
-       63 -    # Fallback: neutral for all
-       80      return [{"text": t, "sentiment": "neutral", "score": 0.5} for t in texts]
-       81
-       82
-       67 -@app.post("/predict")
-       68 -async def predict(body: PredictRequest, request: Request):
-       69 -    if API_KEY:
-       70 -        key = request.headers.get("X-API-Key")
-       71 -        if key != API_KEY:
-       72 -            raise HTTPException(status_code=401, detail="Invalid API key")
-       83 +def run_hf_api(texts: list[str]) -> str:
-       84 +    messages = [
-       85 +        {"role": "system", "content": SYSTEM_PROMPT},
-       86 +        {"role": "user", "content": build_prompt(texts)},
-       87 +    ]
-       88 +    response = inference_client.chat_completion(
-       89 +        messages=messages,
-       90 +        max_tokens=512,
-       91 +        temperature=0.1,
-       92 +    )
-       93 +    return response.choices[0].message.content
-       94
-       74 -    texts = body.texts
-       75 -    if not texts:
-       76 -        raise HTTPException(status_code=400, detail="texts must not be empty")
-       77 -    if len(texts) > 20:
-       78 -        raise HTTPException(status_code=400, detail="Maximum 20 texts per request")
-       95
-       80 -    if model is None or tokenizer is None:
-       81 -        raise HTTPException(status_code=503, detail="Model not loaded yet")
-       82 -
-       96 +def run_local(texts: list[str]) -> str:
-       97 +    import torch
-       98      messages = [
-       99          {"role": "system", "content": SYSTEM_PROMPT},
-      100          {"role": "user", "content": build_prompt(texts)},
-      101      ]
-       87 -
-       88 -    text_input = tokenizer.apply_chat_template(
-      102 +    text_input = local_tokenizer.apply_chat_template(
-      103          messages,
-      104          tokenize=False,
-      105          add_generation_prompt=True,
-       92 -        enable_thinking=False,  # Disable thinking for faster response
-      106 +        enable_thinking=False,
-      107      )
-       94 -    inputs = tokenizer(text_input, return_tensors="pt")
-       95 -
-      108 +    inputs = local_tokenizer(text_input, return_tensors="pt")
-      109      with torch.no_grad():
-       97 -        outputs = model.generate(
-      110 +        outputs = local_model.generate(
-      111              **inputs,
-      112              max_new_tokens=512,
-      113              do_sample=False,
-      101 -            pad_token_id=tokenizer.eos_token_id,
-      114 +            pad_token_id=local_tokenizer.eos_token_id,
-      115          )
-      103 -
-      116      generated = outputs[0][inputs["input_ids"].shape[1]:]
-      105 -    raw = tokenizer.decode(generated, skip_special_tokens=True)
-      117 +    return local_tokenizer.decode(generated, skip_special_tokens=True)
-      118
-      119 +
-      120 +@app.post("/predict")
-      121 +async def predict(body: PredictRequest, request: Request):
-      122 +    if API_KEY:
-      123 +        key = request.headers.get("X-API-Key")
-      124 +        if key != API_KEY:
-      125 +            raise HTTPException(status_code=401, detail="Invalid API key")
-      126 +
-      127 +    texts = body.texts
-      128 +    if not texts:
-      129 +        raise HTTPException(status_code=400, detail="texts must not be empty")
-      130 +    if len(texts) > 20:
-      131 +        raise HTTPException(status_code=400, detail="Maximum 20 texts per request")
-      132 +
-      133 +    if inference_client is None and local_model is None:
-      134 +        raise HTTPException(status_code=503, detail="Model not loaded yet")
-      135 +
-      136 +    raw = run_hf_api(texts) if inference_client else run_local(texts)
-      137      results = parse_response(raw, texts)
-      138
-      109 -    # Normalize output format
-      139      normalized = []
-      140      for r in results:
-      141          sentiment = str(r.get("sentiment", "neutral")).lower()
-     ...
-      152
-      153  @app.get("/health")
-      154  def health():
-      126 -    return {"status": "ok", "model_loaded": model is not None}
-      155 +    mode = "hf_api" if inference_client else "local_int8" if local_model else "not_loaded"
-      156 +    return {"status": "ok", "mode": mode}

+import json
+import os
+import re
+from contextlib import asynccontextmanager
+from fastapi import FastAPI, HTTPException, Request
+from pydantic import BaseModel
+MODEL_NAME = "Qwen/Qwen3.5-0.8B"
+API_KEY = os.getenv("API_KEY")
+HF_TOKEN = os.getenv("HF_TOKEN")
+# Will hold either InferenceClient or local model+tokenizer
+inference_client = None
+local_model = None
+local_tokenizer = None
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    global inference_client, local_model, local_tokenizer
+    if HF_TOKEN:
+        # Option 1: HF Inference API (GPU-backed, fast)
+        print("HF_TOKEN found — using HF Inference API")
+        from huggingface_hub import InferenceClient
+        inference_client = InferenceClient(model=MODEL_NAME, token=HF_TOKEN)
+        print("Inference client ready.")
+    else:
+        # Option 2: Local model with INT8 quantization (CPU fallback)
+        print("No HF_TOKEN — loading model locally with INT8 quantization")
+        import torch
+        from transformers import AutoModelForCausalLM, AutoTokenizer
+        import torch.quantization
+        local_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+        model = AutoModelForCausalLM.from_pretrained(
+            MODEL_NAME,
+            torch_dtype=torch.float32,
+            device_map="cpu",
+        )
+        # Apply dynamic INT8 quantization for faster CPU inference
+        local_model = torch.quantization.quantize_dynamic(
+            model, {torch.nn.Linear}, dtype=torch.qint8
+        )
+        local_model.eval()
+        print("Local INT8 model ready.")
+    yield
+app = FastAPI(title="StockPro Sentiment", lifespan=lifespan)
+class PredictRequest(BaseModel):
+    texts: list[str]
+SYSTEM_PROMPT = """You are a financial sentiment analyzer for Indonesian stock market news.
+Analyze each headline and return ONLY a JSON array with no extra text.
+Each item must have: "text" (original), "sentiment" ("positive", "negative", or "neutral"), "score" (0.0-1.0 confidence).
+Respond only with the JSON array, no markdown, no explanation."""
+def build_prompt(texts: list[str]) -> str:
+    headlines = "\n".join(f"{i+1}. {t}" for i, t in enumerate(texts))
+    return f"Analyze sentiment for these Indonesian stock headlines:\n{headlines}"
+def parse_response(raw: str, texts: list[str]) -> list[dict]:
+    raw = re.sub(r"<think>.*?</think>", "", raw, flags=re.DOTALL).strip()
+    match = re.search(r"\[.*\]", raw, re.DOTALL)
+    if match:
+        try:
+            parsed = json.loads(match.group())
+            if isinstance(parsed, list) and len(parsed) == len(texts):
+                return parsed
+        except json.JSONDecodeError:
+            pass
+    return [{"text": t, "sentiment": "neutral", "score": 0.5} for t in texts]
+def run_hf_api(texts: list[str]) -> str:
+    messages = [
+        {"role": "system", "content": SYSTEM_PROMPT},
+        {"role": "user", "content": build_prompt(texts)},
+    ]
+    response = inference_client.chat_completion(
+        messages=messages,
+        max_tokens=512,
+        temperature=0.1,
+    )
+    return response.choices[0].message.content
+def run_local(texts: list[str]) -> str:
+    import torch
+    messages = [
+        {"role": "system", "content": SYSTEM_PROMPT},
+        {"role": "user", "content": build_prompt(texts)},
+    ]
+    text_input = local_tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True,
+        enable_thinking=False,
+    )
+    inputs = local_tokenizer(text_input, return_tensors="pt")
+    with torch.no_grad():
+        outputs = local_model.generate(
+            **inputs,
+            max_new_tokens=512,
+            do_sample=False,
+            pad_token_id=local_tokenizer.eos_token_id,
+        )
+    generated = outputs[0][inputs["input_ids"].shape[1]:]
+    return local_tokenizer.decode(generated, skip_special_tokens=True)
+@app.post("/predict")
+async def predict(body: PredictRequest, request: Request):
+    if API_KEY:
+        key = request.headers.get("X-API-Key")
+        if key != API_KEY:
+            raise HTTPException(status_code=401, detail="Invalid API key")
+    texts = body.texts
+    if not texts:
+        raise HTTPException(status_code=400, detail="texts must not be empty")
+    if len(texts) > 20:
+        raise HTTPException(status_code=400, detail="Maximum 20 texts per request")
+    if inference_client is None and local_model is None:
+        raise HTTPException(status_code=503, detail="Model not loaded yet")
+    raw = run_hf_api(texts) if inference_client else run_local(texts)
+    results = parse_response(raw, texts)
+    normalized = []
+    for r in results:
+        sentiment = str(r.get("sentiment", "neutral")).lower()
+        if sentiment not in ("positive", "negative", "neutral"):
+            sentiment = "neutral"
+        normalized.append({
+            "text": r.get("text", ""),
+            "sentiment": sentiment,
+            "score": round(float(r.get("score", 0.5)), 4),
+        })
+    return {"results": normalized}
+@app.get("/health")
+def health():
+    mode = "hf_api" if inference_client else "local_int8" if local_model else "not_loaded"
+    return {"status": "ok", "mode": mode}