Spaces:

Sameer-Handsome173
/

Multi_modal

Sleeping

App Files Files Community

Multi_modal / app.py

Sameer-Handsome173

Update app.py

e1dae85 verified 3 months ago

raw

history blame contribute delete

3.58 kB

	import os
	from fastapi import FastAPI, Form, File, UploadFile
	from transformers import pipeline
	from PIL import Image
	import io

	# 🧩 LangSmith Integration (Optional)
	os.environ["LANGCHAIN_TRACING_V2"] = "true"
	os.environ["LANGCHAIN_PROJECT"] = "Multimodal_RAG_App"
	os.environ["LANGCHAIN_API_KEY"] = "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"

	from langsmith import traceable

	app = FastAPI(title="🚀 Multimodal RAG API (CPU-friendly)")

	# ✅ Always use writable cache dir in Hugging Face Spaces
	os.environ["HF_HOME"] = "/tmp/huggingface_cache"

	# ------------------ Load Pipelines ------------------
	# 🧠 Lightweight Qwen model (0.6B params - perfect for CPU!)
	text_pipeline = pipeline(
	"text-generation",
	model="Qwen/Qwen2.5-0.5B-Instruct", # Using the instruct version for better responses
	device=-1, # -1 means CPU in pipeline API
	trust_remote_code=True
	)

	# 👁️ Image-to-text (multimodal captioning)
	image_pipeline = pipeline(
	"image-to-text",
	model="Salesforce/blip-image-captioning-base",
	device=-1 # Force CPU
	)

	@app.get("/")
	def home():
	return {"message": "✅ Multimodal RAG API is running successfully!"}

	# ------------------ 1️⃣ Summarize Text ------------------
	@traceable(name="Qwen_Text_Summarization")
	@app.post("/summarize_qwen")
	def summarize_text(prompt: str = Form(...)):
	try:
	result = text_pipeline(
	prompt,
	max_new_tokens=200,
	do_sample=True,
	temperature=0.7,
	top_p=0.9
	)
	return {"response": result[0]["generated_text"]}
	except Exception as e:
	return {"error": str(e), "details": "Text summarization failed"}

	# ------------------ 2️⃣ Image + Text Summarizer ------------------
	@traceable(name="BLIP_Image_Text_Summarization")
	@app.post("/summarize_smol")
	async def summarize_smol(text: str = Form(...), image: UploadFile = File(None)):
	try:
	if image:
	image_bytes = await image.read()
	pil_image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
	img_result = image_pipeline(pil_image)
	response = f"{text}\n\n🖼️ Image Summary: {img_result[0]['generated_text']}"
	else:
	response = text
	return {"response": response}
	except Exception as e:
	return {"error": str(e), "details": "Image processing failed"}

	# ------------------ 3️⃣ Final Answer (Text + Multiple Images) ------------------
	@traceable(name="Final_Multimodal_Answer")
	@app.post("/final_answer")
	async def final_answer(
	context: str = Form(...),
	question: str = Form(...),
	images: list[UploadFile] = File(None)
	):
	try:
	combined_context = context

	# Process images if provided
	if images:
	for img_file in images:
	img_bytes = await img_file.read()
	pil_image = Image.open(io.BytesIO(img_bytes)).convert("RGB")
	img_result = image_pipeline(pil_image)
	combined_context += f"\n\n🖼️ Image Info: {img_result[0]['generated_text']}"

	# Create prompt for Qwen
	final_prompt = f"Context: {combined_context}\n\nQuestion: {question}\n\nAnswer:"
	result = text_pipeline(
	final_prompt,
	max_new_tokens=300,
	do_sample=True,
	temperature=0.7,
	top_p=0.9
	)
	return {"response": result[0]["generated_text"]}
	except Exception as e:
	return {"error": str(e), "details": "Final answer generation failed"}