File size: 2,564 Bytes
70ca8e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
from fastapi import FastAPI
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import uvicorn
import os

app = FastAPI()

# Load models and tokenizer
model_name = "Qwen/Qwen2.5-0.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
generator_pipe = pipeline("text-generation", model=model_name, tokenizer=tokenizer)
summarizer_pipe = pipeline("summarization", model="facebook/bart-large-cnn")

app.mount("/static", StaticFiles(directory="static"), name="static")

class GenRequest(BaseModel):
    text: str
    max_new_tokens: int = 150
    do_sample: bool = False
    mode: str = "generate"  # "generate" or "summarize"

@app.get("/", response_class=HTMLResponse)
async def read_root():
    with open("templates/index.html", "r") as f:
        return f.read()

@app.post("/generate")
def generate(req: GenRequest):
    if req.mode == "summarize":
        # Use summarization pipeline
        out = summarizer_pipe(
            req.text,
            max_length=req.max_new_tokens,
            min_length=30,
            do_sample=req.do_sample,
        )
        return {"generated_text": out[0]["summary_text"]}
    else:
        # Use text generation pipeline
        out = generator_pipe(
            req.text,
            max_new_tokens=req.max_new_tokens,
            do_sample=req.do_sample,
            truncation=True,
            return_full_text=False,
        )
        return {"generated_text": out[0]["generated_text"]}

@app.post("/predict_next")
def predict_next(req: GenRequest):
    """Get top predictions for next word/token"""
    inputs = tokenizer(req.text, return_tensors="pt")
    
    with torch.no_grad():
        outputs = model(**inputs)
        next_token_logits = outputs.logits[0, -1, :]
        
        # Get top 10 predictions
        top_k = 10
        probs = torch.softmax(next_token_logits, dim=-1)
        top_probs, top_indices = torch.topk(probs, top_k)
        
        predictions = []
        for prob, idx in zip(top_probs.tolist(), top_indices.tolist()):
            token = tokenizer.decode([idx])
            predictions.append({
                "token": token,
                "probability": round(prob * 100, 2)
            })
    
    return {"predictions": predictions}

if __name__ == "__main__":
    port = int(os.environ.get("PORT", 7860))
    uvicorn.run(app, host="0.0.0.0", port=port)