Spaces:

will702
/

sentiment-analysis

Sleeping

App Files Files Community

will702 commited on 26 days ago

Commit

b395362

verified ·

1 Parent(s): e98af63

Update app.py

Browse files

Files changed (1) hide show

app.py +168 -126

app.py CHANGED Viewed

@@ -1,126 +1,168 @@
-import json
-import os
-import re
-from contextlib import asynccontextmanager
-from fastapi import FastAPI, HTTPException, Request
-from pydantic import BaseModel
-from transformers import AutoModelForCausalLM, AutoTokenizer
-import torch
-MODEL_NAME = "Qwen/Qwen3.5-0.8B"
-API_KEY = os.getenv("API_KEY")
-tokenizer = None
-model = None
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    global tokenizer, model
-    print(f"Loading model: {MODEL_NAME}")
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-    model = AutoModelForCausalLM.from_pretrained(
-        MODEL_NAME,
-        torch_dtype=torch.float32,  # CPU requires float32
-        device_map="cpu",
-    )
-    model.eval()
-    print("Model loaded.")
-    yield
-app = FastAPI(title="StockPro Sentiment", lifespan=lifespan)
-class PredictRequest(BaseModel):
-    texts: list[str]
-SYSTEM_PROMPT = """You are a financial sentiment analyzer for Indonesian stock market news.
-Analyze each headline and return ONLY a JSON array with no extra text.
-Each item must have: "text" (original), "sentiment" ("positive", "negative", or "neutral"), "score" (0.0-1.0 confidence).
-Respond only with the JSON array, no markdown, no explanation."""
-def build_prompt(texts: list[str]) -> str:
-    headlines = "\n".join(f"{i+1}. {t}" for i, t in enumerate(texts))
-    return f"Analyze sentiment for these Indonesian stock headlines:\n{headlines}"
-def parse_response(raw: str, texts: list[str]) -> list[dict]:
-    # Strip thinking tags if present
-    raw = re.sub(r"<think>.*?</think>", "", raw, flags=re.DOTALL).strip()
-    # Extract JSON array
-    match = re.search(r"\[.*\]", raw, re.DOTALL)
-    if match:
-        try:
-            parsed = json.loads(match.group())
-            if isinstance(parsed, list) and len(parsed) == len(texts):
-                return parsed
-        except json.JSONDecodeError:
-            pass
-    # Fallback: neutral for all
-    return [{"text": t, "sentiment": "neutral", "score": 0.5} for t in texts]
-@app.post("/predict")
-async def predict(body: PredictRequest, request: Request):
-    if API_KEY:
-        key = request.headers.get("X-API-Key")
-        if key != API_KEY:
-            raise HTTPException(status_code=401, detail="Invalid API key")
-    texts = body.texts
-    if not texts:
-        raise HTTPException(status_code=400, detail="texts must not be empty")
-    if len(texts) > 20:
-        raise HTTPException(status_code=400, detail="Maximum 20 texts per request")
-    if model is None or tokenizer is None:
-        raise HTTPException(status_code=503, detail="Model not loaded yet")
-    messages = [
-        {"role": "system", "content": SYSTEM_PROMPT},
-        {"role": "user", "content": build_prompt(texts)},
-    ]
-    text_input = tokenizer.apply_chat_template(
-        messages,
-        tokenize=False,
-        add_generation_prompt=True,
-        enable_thinking=False,  # Disable thinking for faster response
-    )
-    inputs = tokenizer(text_input, return_tensors="pt")
-    with torch.no_grad():
-        outputs = model.generate(
-            **inputs,
-            max_new_tokens=512,
-            do_sample=False,
-            pad_token_id=tokenizer.eos_token_id,
-        )
-    generated = outputs[0][inputs["input_ids"].shape[1]:]
-    raw = tokenizer.decode(generated, skip_special_tokens=True)
-    results = parse_response(raw, texts)
-    # Normalize output format
-    normalized = []
-    for r in results:
-        sentiment = str(r.get("sentiment", "neutral")).lower()
-        if sentiment not in ("positive", "negative", "neutral"):
-            sentiment = "neutral"
-        normalized.append({
-            "text": r.get("text", ""),
-            "sentiment": sentiment,
-            "score": round(float(r.get("score", 0.5)), 4),
-        })
-    return {"results": normalized}
-@app.get("/health")
-def health():
-    return {"status": "ok", "model_loaded": model is not None}

+ 6  from fastapi import FastAPI, HTTPException, Request
+       7  from pydantic import BaseModel
+       8 -from transformers import AutoModelForCausalLM, AutoTokenizer
+       9 -import torch
+       8
+       9  MODEL_NAME = "Qwen/Qwen3.5-0.8B"
+      10  API_KEY = os.getenv("API_KEY")
+      11 +HF_TOKEN = os.getenv("HF_TOKEN")
+      12
+      14 -tokenizer = None
+      15 -model = None
+      13 +# Will hold either InferenceClient or local model+tokenizer
+      14 +inference_client = None
+      15 +local_model = None
+      16 +local_tokenizer = None
+      17
+      18
+      19  @asynccontextmanager
+      20  async def lifespan(app: FastAPI):
+      20 -    global tokenizer, model
+      21 -    print(f"Loading model: {MODEL_NAME}")
+      22 -    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+      23 -    model = AutoModelForCausalLM.from_pretrained(
+      24 -        MODEL_NAME,
+      25 -        torch_dtype=torch.float32,  # CPU requires float32
+      26 -        device_map="cpu",
+      27 -    )
+      28 -    model.eval()
+      29 -    print("Model loaded.")
+      21 +    global inference_client, local_model, local_tokenizer
+      22 +
+      23 +    if HF_TOKEN:
+      24 +        # Option 1: HF Inference API (GPU-backed, fast)
+      25 +        print("HF_TOKEN found — using HF Inference API")
+      26 +        from huggingface_hub import InferenceClient
+      27 +        inference_client = InferenceClient(model=MODEL_NAME, token=HF_TOKEN)
+      28 +        print("Inference client ready.")
+      29 +    else:
+      30 +        # Option 2: Local model with INT8 quantization (CPU fallback)
+      31 +        print("No HF_TOKEN — loading model locally with INT8 quantization")
+      32 +        import torch
+      33 +        from transformers import AutoModelForCausalLM, AutoTokenizer
+      34 +        import torch.quantization
+      35 +
+      36 +        local_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+      37 +        model = AutoModelForCausalLM.from_pretrained(
+      38 +            MODEL_NAME,
+      39 +            torch_dtype=torch.float32,
+      40 +            device_map="cpu",
+      41 +        )
+      42 +        # Apply dynamic INT8 quantization for faster CPU inference
+      43 +        local_model = torch.quantization.quantize_dynamic(
+      44 +            model, {torch.nn.Linear}, dtype=torch.qint8
+      45 +        )
+      46 +        local_model.eval()
+      47 +        print("Local INT8 model ready.")
+      48 +
+      49      yield
+      50
+      51
+     ...
+      68
+      69
+      70  def parse_response(raw: str, texts: list[str]) -> list[dict]:
+      52 -    # Strip thinking tags if present
+      71      raw = re.sub(r"<think>.*?</think>", "", raw, flags=re.DOTALL).strip()
+      54 -    # Extract JSON array
+      72      match = re.search(r"\[.*\]", raw, re.DOTALL)
+      73      if match:
+      74          try:
+     ...
+       77                  return parsed
+       78          except json.JSONDecodeError:
+       79              pass
+       63 -    # Fallback: neutral for all
+       80      return [{"text": t, "sentiment": "neutral", "score": 0.5} for t in texts]
+       81
+       82
+       67 -@app.post("/predict")
+       68 -async def predict(body: PredictRequest, request: Request):
+       69 -    if API_KEY:
+       70 -        key = request.headers.get("X-API-Key")
+       71 -        if key != API_KEY:
+       72 -            raise HTTPException(status_code=401, detail="Invalid API key")
+       83 +def run_hf_api(texts: list[str]) -> str:
+       84 +    messages = [
+       85 +        {"role": "system", "content": SYSTEM_PROMPT},
+       86 +        {"role": "user", "content": build_prompt(texts)},
+       87 +    ]
+       88 +    response = inference_client.chat_completion(
+       89 +        messages=messages,
+       90 +        max_tokens=512,
+       91 +        temperature=0.1,
+       92 +    )
+       93 +    return response.choices[0].message.content
+       94
+       74 -    texts = body.texts
+       75 -    if not texts:
+       76 -        raise HTTPException(status_code=400, detail="texts must not be empty")
+       77 -    if len(texts) > 20:
+       78 -        raise HTTPException(status_code=400, detail="Maximum 20 texts per request")
+       95
+       80 -    if model is None or tokenizer is None:
+       81 -        raise HTTPException(status_code=503, detail="Model not loaded yet")
+       82 -
+       96 +def run_local(texts: list[str]) -> str:
+       97 +    import torch
+       98      messages = [
+       99          {"role": "system", "content": SYSTEM_PROMPT},
+      100          {"role": "user", "content": build_prompt(texts)},
+      101      ]
+       87 -
+       88 -    text_input = tokenizer.apply_chat_template(
+      102 +    text_input = local_tokenizer.apply_chat_template(
+      103          messages,
+      104          tokenize=False,
+      105          add_generation_prompt=True,
+       92 -        enable_thinking=False,  # Disable thinking for faster response
+      106 +        enable_thinking=False,
+      107      )
+       94 -    inputs = tokenizer(text_input, return_tensors="pt")
+       95 -
+      108 +    inputs = local_tokenizer(text_input, return_tensors="pt")
+      109      with torch.no_grad():
+       97 -        outputs = model.generate(
+      110 +        outputs = local_model.generate(
+      111              **inputs,
+      112              max_new_tokens=512,
+      113              do_sample=False,
+      101 -            pad_token_id=tokenizer.eos_token_id,
+      114 +            pad_token_id=local_tokenizer.eos_token_id,
+      115          )
+      103 -
+      116      generated = outputs[0][inputs["input_ids"].shape[1]:]
+      105 -    raw = tokenizer.decode(generated, skip_special_tokens=True)
+      117 +    return local_tokenizer.decode(generated, skip_special_tokens=True)
+      118
+      119 +
+      120 +@app.post("/predict")
+      121 +async def predict(body: PredictRequest, request: Request):
+      122 +    if API_KEY:
+      123 +        key = request.headers.get("X-API-Key")
+      124 +        if key != API_KEY:
+      125 +            raise HTTPException(status_code=401, detail="Invalid API key")
+      126 +
+      127 +    texts = body.texts
+      128 +    if not texts:
+      129 +        raise HTTPException(status_code=400, detail="texts must not be empty")
+      130 +    if len(texts) > 20:
+      131 +        raise HTTPException(status_code=400, detail="Maximum 20 texts per request")
+      132 +
+      133 +    if inference_client is None and local_model is None:
+      134 +        raise HTTPException(status_code=503, detail="Model not loaded yet")
+      135 +
+      136 +    raw = run_hf_api(texts) if inference_client else run_local(texts)
+      137      results = parse_response(raw, texts)
+      138
+      109 -    # Normalize output format
+      139      normalized = []
+      140      for r in results:
+      141          sentiment = str(r.get("sentiment", "neutral")).lower()
+     ...
+      152
+      153  @app.get("/health")
+      154  def health():
+      126 -    return {"status": "ok", "model_loaded": model is not None}
+      155 +    mode = "hf_api" if inference_client else "local_int8" if local_model else "not_loaded"
+      156 +    return {"status": "ok", "mode": mode}