Spaces:

snsmcy
/

UI

Sleeping

App Files Files Community

UI / backend /backend.py

Chan-Y

Initial commit for HF Space

a94ab76 about 1 month ago

raw

history blame contribute delete

8.23 kB

	from fastapi import FastAPI, HTTPException
	from fastapi.middleware.cors import CORSMiddleware
	from pydantic import BaseModel
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
	from peft import PeftModel, PeftConfig
	import gc
	import torch
	import os
	from typing import Optional
	from dotenv import load_dotenv

	# Load environment variables
	load_dotenv()

	app = FastAPI()

	# CORS ayarları - React uygulamanızın çalıştığı port'a izin verin
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["http://localhost:5173", "http://localhost:3000"], # Vite ve CRA portları
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# Global variables
	current_model = None
	current_pipe = None
	current_model_name = None

	class GenerateRequest(BaseModel):
	model_name: str
	prompt: str
	system_prompt: str
	max_tokens: int = 512
	temperature: float = 0.75
	top_p: float = 0.95
	top_k: int = 64
	image: Optional[str] = None # Base64 encoded image

	class GenerateResponse(BaseModel):
	generated_text: str
	model_used: str

	def load_model(model_path: str):
	global current_model, current_pipe, current_model_name

	# Return existing pipeline if the model is already loaded
	if current_pipe is not None and current_model_name == model_path:
	return current_pipe

	print(f"Unloading previous model to load: {model_path}")

	# Cleanup previous model
	if current_model is not None:
	del current_model
	if current_pipe is not None:
	del current_pipe

	gc.collect()
	torch.cuda.empty_cache()

	try:
	if "Gemma3-1B" in model_path:
	print("Loading Gemma 3 1B with PEFT...")
	base_model_name = "unsloth/gemma-3-1b-it"
	tokenizer = AutoTokenizer.from_pretrained(base_model_name)
	base_model = AutoModelForCausalLM.from_pretrained(
	base_model_name,
	device_map="auto",
	dtype="auto"
	)
	model = PeftModel.from_pretrained(base_model, model_path)
	pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
	current_model = model

	elif "Gemma3-12B" in model_path:
	print("Loading Gemma 3 12B with PEFT (Image Support)...")
	base_model_name = "unsloth/gemma-3-12b-it"
	tokenizer = AutoTokenizer.from_pretrained(base_model_name)
	base_model = AutoModelForCausalLM.from_pretrained(
	base_model_name,
	device_map="auto",
	dtype="auto"
	)
	model = PeftModel.from_pretrained(base_model, model_path)
	pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
	current_model = model

	elif "Qwen2.5-3B" in model_path:
	print("Loading Qwen 2.5 3B...")
	pipe = pipeline("text-generation", model=model_path, device=0)
	current_model = pipe.model # Keep reference for simple consistency

	elif "Llama3.1-8B" in model_path:
	print("Loading Llama 3.1 8B...")
	pipe = pipeline("text-generation", model=model_path, device=0)
	current_model = pipe.model

	else:
	print(f"Unknown model pattern for {model_path}, trying default pipeline loading...")
	pipe = pipeline("text-generation", model=model_path, device=0)
	current_model = pipe.model

	current_pipe = pipe
	current_model_name = model_path
	return pipe

	except Exception as e:
	print(f"Error loading model {model_path}: {str(e)}")
	raise HTTPException(status_code=500, detail=f"Model loading failed: {str(e)}")

	# Initialize with default 1B model
	default_model = "Chan-Y/TurkishReasoner-Gemma3-1B"
	try:
	load_model(default_model)
	except Exception as e:
	print(f"Initial model loading failed (might be expected in dev env): {e}")

	@app.get("/")
	def read_root():
	return {"message": "Turkish AI Backend API is running"}

	@app.get("/models")
	def get_models():
	"""Return available models"""
	return {
	"models": [
	{
	"name": "Gemma 3 1B Turkish Reasoning",
	"path": "Chan-Y/TurkishReasoner-Gemma3-1B",
	"supportsImages": False
	},
	{
	"name": "Gemma 3 12B Turkish (Supports Images)",
	"path": "Chan-Y/TurkishReasoner-Gemma3-12B",
	"supportsImages": True
	},
	{
	"name": "Qwen 2.5 3B Turkish Reasoning",
	"path": "Chan-Y/TurkishReasoner-Qwen2.5-3B",
	"supportsImages": False
	},
	{
	"name": "Llama 3.1 8B Turkish Reasoning",
	"path": "Chan-Y/TurkishReasoner-Llama3.1-8B",
	"supportsImages": False
	}
	]
	}

	@app.post("/generate", response_model=GenerateResponse)
	async def generate_text(request: GenerateRequest):
	"""Generate text using the model"""
	global current_pipe

	try:
	# Load requested model if different
	pipe = load_model(request.model_name)

	# Prepare message content
	user_content = [{"type": "text", "text": request.prompt}]

	# Add image if provided and supported
	if request.image and "Gemma3-12B" in request.model_name:
	# Assuming the image string is a data:image/jpeg;base64,... URI
	# Pipeline might expect a PIL image or a URL or strictly formatted dict
	# Standard transformers pipeline behavior for image:
	# {"type": "image", "image": "base64_string_or_url"}
	user_content.insert(0, {"type": "image", "image": request.image})

	messages = [
	{
	"role": "system",
	"content": [{"type": "text", "text": request.system_prompt}]
	},
	{
	"role": "user",
	"content": user_content
	},
	]

	# Clean up system prompt if empty or not supported by some models?
	# Standard chat templates usually handle system prompts.

	print(f"Generating with {request.model_name}, temp={request.temperature}")

	output = pipe(
	messages,
	max_new_tokens=request.max_tokens,
	temperature=request.temperature,
	top_p=request.top_p,
	top_k=request.top_k
	)

	generated_text = output[0]["generated_text"][-1]["content"]

	return GenerateResponse(
	generated_text=generated_text,
	model_used=request.model_name
	)

	except Exception as e:
	print(f"Error during generation: {str(e)}")
	raise HTTPException(status_code=500, detail=str(e))

	@app.post("/generate/stream")
	async def generate_text_stream(request: GenerateRequest):
	"""
	Streaming endpoint for real-time generation
	(Not implemented in this version - would use Server-Sent Events)
	"""
	raise HTTPException(status_code=501, detail="Streaming not yet implemented")

	# --- Static Files Serving (for Deployment) ---
	from fastapi.staticfiles import StaticFiles
	from fastapi.responses import FileResponse

	# Check if static directory exists (it will in Docker)
	static_dir = "static"
	if os.path.exists(static_dir):
	app.mount("/assets", StaticFiles(directory=f"{static_dir}/assets"), name="assets")

	# Catch-all for SPA (serve index.html)
	@app.get("/{full_path:path}")
	async def serve_spa(full_path: str):
	# Allow API routes to pass through (though they match specifically defined routes first)
	if full_path.startswith("api") or full_path.startswith("generate") or full_path.startswith("models"):
	raise HTTPException(status_code=404, detail="Not found")

	# Serve index.html for everything else
	return FileResponse(f"{static_dir}/index.html")
	else:
	print("Static directory not found. Running in API-only mode.")

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=7860)