File size: 3,781 Bytes
1a587d4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73b5638
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import io
import os
import uvicorn
import fitz  # PyMuPDF
import pytesseract
from PIL import Image
from fastapi import FastAPI, Request, UploadFile, File, HTTPException
from fastapi.templating import Jinja2Templates
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
from contextlib import asynccontextmanager

# --- Global AI Handler ---
llm = None

@asynccontextmanager
async def lifespan(app: FastAPI):
    global llm
    print("🚀 Cloud Startup: Preparing AI Engine...")
    try:
        # Step 1: Download the 135M model (Optimized for free CPU tier)
        # Using a 1h timeout via README metadata is recommended for this step
        model_path = hf_hub_download(
            repo_id="bartowski/SmolLM2-135M-Instruct-GGUF", 
            filename="SmolLM2-135M-Instruct-Q8_0.gguf"
        )
        
        # Step 2: Load the model into memory
        llm = Llama(
            model_path=model_path, 
            n_ctx=2048, 
            n_threads=2, # Optimized for shared cloud CPUs
            n_batch=512
        )
        print("✅ AI Engine loaded successfully!")
    except Exception as e:
        print(f"⚠️ AI Startup Failed: {e}. The app will run in OCR-only mode.")
        llm = None  # Fallback: App starts, but AI summaries won't work
    yield

app = FastAPI(lifespan=lifespan)
templates = Jinja2Templates(directory="templates")

# --- Deep Parsing Logic (OCR) ---
def deep_document_parse(file_bytes, extension):
    text_content = []
    try:
        if extension == ".pdf":
            doc = fitz.open(stream=file_bytes, filetype="pdf")
            for page_num, page in enumerate(doc):
                page_text = page.get_text().strip()
                # If page is an image (less than 50 chars), use OCR
                if len(page_text) < 50:
                    pix = page.get_pixmap(dpi=300)
                    img = Image.open(io.BytesIO(pix.tobytes("png")))
                    page_text = pytesseract.image_to_string(img)
                text_content.append(f"[PAGE {page_num + 1}]\n{page_text}")
        elif extension in [".png", ".jpg", ".jpeg"]:
            img = Image.open(io.BytesIO(file_bytes))
            text_content.append(pytesseract.image_to_string(img))
    except Exception as e:
        return f"Error parsing file: {str(e)}"
    
    return "\n\n".join(text_content)

# --- Endpoints ---
@app.get("/")
async def home(request: Request):
    # Renders your index.html from the /templates folder
    return templates.TemplateResponse("index.html", {"request": request})

@app.post("/process-file")
async def process_file(file: UploadFile = File(...)):
    file_bytes = await file.read()
    extension = "." + file.filename.split(".")[-1].lower()
    return {"extracted_text": deep_document_parse(file_bytes, extension)}

@app.post("/generate")
async def generate(request: Request):
    if llm is None:
        return {"result": "⚠️ AI Engine is offline. Please check logs for download errors."}
    
    data = await request.json()
    topic = data.get("topic", "")
    mode = data.get("mode", "summarize")
    points = data.get("points", 3)
    
    instruction = "Explain this" if mode == "explain" else f"Summarize into {points} points"
    
    prompt = f"<|im_start|>system\nYou are an expert assistant. {instruction}.<|im_end|>\n<|im_start|>user\n{topic}<|im_end|>\n<|im_start|>assistant\n"
    
    try:
        response = llm(prompt, max_tokens=500, stop=["<|im_end|>"])
        return {"result": response["choices"][0]["text"]}
    except Exception as e:
        return {"result": f"AI Generation Error: {str(e)}"}

if __name__ == "__main__":
    # Hugging Face MUST use port 7860 and host 0.0.0.0
    port = int(os.environ.get("PORT", 7860))
    uvicorn.run(app, host="0.0.0.0", port=port)