Spaces:
Sleeping
Sleeping
| import os | |
| from fastapi import FastAPI, Form, File, UploadFile | |
| from transformers import pipeline | |
| from PIL import Image | |
| import io | |
| # 🧩 LangSmith Integration (Optional) | |
| os.environ["LANGCHAIN_TRACING_V2"] = "true" | |
| os.environ["LANGCHAIN_PROJECT"] = "Multimodal_RAG_App" | |
| os.environ["LANGCHAIN_API_KEY"] = "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" | |
| from langsmith import traceable | |
| app = FastAPI(title="🚀 Multimodal RAG API (CPU-friendly)") | |
| # ✅ Always use writable cache dir in Hugging Face Spaces | |
| os.environ["HF_HOME"] = "/tmp/huggingface_cache" | |
| # ------------------ Load Pipelines ------------------ | |
| # 🧠 Lightweight Qwen model (0.6B params - perfect for CPU!) | |
| text_pipeline = pipeline( | |
| "text-generation", | |
| model="Qwen/Qwen2.5-0.5B-Instruct", # Using the instruct version for better responses | |
| device=-1, # -1 means CPU in pipeline API | |
| trust_remote_code=True | |
| ) | |
| # 👁️ Image-to-text (multimodal captioning) | |
| image_pipeline = pipeline( | |
| "image-to-text", | |
| model="Salesforce/blip-image-captioning-base", | |
| device=-1 # Force CPU | |
| ) | |
| def home(): | |
| return {"message": "✅ Multimodal RAG API is running successfully!"} | |
| # ------------------ 1️⃣ Summarize Text ------------------ | |
| def summarize_text(prompt: str = Form(...)): | |
| try: | |
| result = text_pipeline( | |
| prompt, | |
| max_new_tokens=200, | |
| do_sample=True, | |
| temperature=0.7, | |
| top_p=0.9 | |
| ) | |
| return {"response": result[0]["generated_text"]} | |
| except Exception as e: | |
| return {"error": str(e), "details": "Text summarization failed"} | |
| # ------------------ 2️⃣ Image + Text Summarizer ------------------ | |
| async def summarize_smol(text: str = Form(...), image: UploadFile = File(None)): | |
| try: | |
| if image: | |
| image_bytes = await image.read() | |
| pil_image = Image.open(io.BytesIO(image_bytes)).convert("RGB") | |
| img_result = image_pipeline(pil_image) | |
| response = f"{text}\n\n🖼️ Image Summary: {img_result[0]['generated_text']}" | |
| else: | |
| response = text | |
| return {"response": response} | |
| except Exception as e: | |
| return {"error": str(e), "details": "Image processing failed"} | |
| # ------------------ 3️⃣ Final Answer (Text + Multiple Images) ------------------ | |
| async def final_answer( | |
| context: str = Form(...), | |
| question: str = Form(...), | |
| images: list[UploadFile] = File(None) | |
| ): | |
| try: | |
| combined_context = context | |
| # Process images if provided | |
| if images: | |
| for img_file in images: | |
| img_bytes = await img_file.read() | |
| pil_image = Image.open(io.BytesIO(img_bytes)).convert("RGB") | |
| img_result = image_pipeline(pil_image) | |
| combined_context += f"\n\n🖼️ Image Info: {img_result[0]['generated_text']}" | |
| # Create prompt for Qwen | |
| final_prompt = f"Context: {combined_context}\n\nQuestion: {question}\n\nAnswer:" | |
| result = text_pipeline( | |
| final_prompt, | |
| max_new_tokens=300, | |
| do_sample=True, | |
| temperature=0.7, | |
| top_p=0.9 | |
| ) | |
| return {"response": result[0]["generated_text"]} | |
| except Exception as e: | |
| return {"error": str(e), "details": "Final answer generation failed"} |