import os import torch from fastapi import FastAPI, Request from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse from transformers import AutoTokenizer, AutoModelForCausalLM # ------------------------------- # CONFIGURATION # ------------------------------- MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2" HF_TOKEN = os.getenv("HF_TOKEN") # Uses your Hugging Face Space secret # ------------------------------- # LOAD MODEL & TOKENIZER # ------------------------------- tokenizer = AutoTokenizer.from_pretrained( MODEL_NAME, use_auth_token=HF_TOKEN ) model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, torch_dtype=torch.float16 if torch.cuda.is_available() else "auto", device_map="auto", use_auth_token=HF_TOKEN ) # ------------------------------- # FASTAPI APP SETUP # ------------------------------- app = FastAPI() # Enable CORS so frontend can talk to backend app.add_middleware( CORSMiddleware, allow_origins=["*"], # Allow all origins — or restrict if needed allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # ------------------------------- # GENERATE RECIPE ENDPOINT # ------------------------------- @app.post("/generate") async def generate(request: Request): try: # Parse JSON input from frontend data = await request.json() ingredients = data.get("ingredients", "") mood = data.get("mood", "any") time = data.get("time", "30") restrictions = data.get("restrictions", "") # Create the prompt prompt = f""" You are a helpful cooking assistant. Generate ONE {restrictions} {mood} recipe using the following ingredients: {ingredients}. The recipe should be easy to follow and ready in under {time} minutes. Include: - A fun and catchy title - Ingredient list (with substitutions in brackets) - Clear step-by-step instructions Keep it short, clear, and conversational. """ # Prepare input for model messages = [{"role": "user", "content": prompt}] text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) model_inputs = tokenizer([text], return_tensors="pt").to(model.device) # Generate recipe text generated_ids = model.generate( **model_inputs, max_new_tokens=1024, # Max supported by model on HF Spaces do_sample=True, temperature=0.9, top_p=0.95 ) # Decode the output output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist() content = tokenizer.decode(output_ids, skip_special_tokens=True) return JSONResponse(content={"recipe": content}) except Exception as e: return JSONResponse(content={"error": str(e)}, status_code=500) # ------------------------------- # HEALTH CHECK ENDPOINT # ------------------------------- @app.get("/") async def root(): return {"message": "SnackHack backend is running! 🎉"}