import os
from fastapi import FastAPI, Form, File, UploadFile
from transformers import pipeline
from PIL import Image
import io

# 🧩 LangSmith Integration (Optional)
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "Multimodal_RAG_App"
os.environ["LANGCHAIN_API_KEY"] = "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"

from langsmith import traceable

app = FastAPI(title="🚀 Multimodal RAG API (CPU-friendly)")

# ✅ Always use writable cache dir in Hugging Face Spaces
os.environ["HF_HOME"] = "/tmp/huggingface_cache"

# ------------------ Load Pipelines ------------------
# 🧠 Lightweight Qwen model (0.6B params - perfect for CPU!)
text_pipeline = pipeline(
    "text-generation",
    model="Qwen/Qwen2.5-0.5B-Instruct",  # Using the instruct version for better responses
    device=-1,  # -1 means CPU in pipeline API
    trust_remote_code=True
)

# 👁️ Image-to-text (multimodal captioning)
image_pipeline = pipeline(
    "image-to-text",
    model="Salesforce/blip-image-captioning-base",
    device=-1  # Force CPU
)

@app.get("/")
def home():
    return {"message": "✅ Multimodal RAG API is running successfully!"}

# ------------------ 1️⃣ Summarize Text ------------------
@traceable(name="Qwen_Text_Summarization")
@app.post("/summarize_qwen")
def summarize_text(prompt: str = Form(...)):
    try:
        result = text_pipeline(
            prompt, 
            max_new_tokens=200,
            do_sample=True,
            temperature=0.7,
            top_p=0.9
        )
        return {"response": result[0]["generated_text"]}
    except Exception as e:
        return {"error": str(e), "details": "Text summarization failed"}

# ------------------ 2️⃣ Image + Text Summarizer ------------------
@traceable(name="BLIP_Image_Text_Summarization")
@app.post("/summarize_smol")
async def summarize_smol(text: str = Form(...), image: UploadFile = File(None)):
    try:
        if image:
            image_bytes = await image.read()
            pil_image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
            img_result = image_pipeline(pil_image)
            response = f"{text}\n\n🖼️ Image Summary: {img_result[0]['generated_text']}"
        else:
            response = text
        return {"response": response}
    except Exception as e:
        return {"error": str(e), "details": "Image processing failed"}

# ------------------ 3️⃣ Final Answer (Text + Multiple Images) ------------------
@traceable(name="Final_Multimodal_Answer")
@app.post("/final_answer")
async def final_answer(
    context: str = Form(...),
    question: str = Form(...),
    images: list[UploadFile] = File(None)
):
    try:
        combined_context = context
        
        # Process images if provided
        if images:
            for img_file in images:
                img_bytes = await img_file.read()
                pil_image = Image.open(io.BytesIO(img_bytes)).convert("RGB")
                img_result = image_pipeline(pil_image)
                combined_context += f"\n\n🖼️ Image Info: {img_result[0]['generated_text']}"
        
        # Create prompt for Qwen
        final_prompt = f"Context: {combined_context}\n\nQuestion: {question}\n\nAnswer:"
        result = text_pipeline(
            final_prompt, 
            max_new_tokens=300,
            do_sample=True,
            temperature=0.7,
            top_p=0.9
        )
        return {"response": result[0]["generated_text"]}
    except Exception as e:
        return {"error": str(e), "details": "Final answer generation failed"}