Spaces:

ibo1234
/

genaiass3

Runtime error

App Files Files Community

ibo1234 commited on May 15, 2025

Commit

7720b87

verified ·

1 Parent(s): 415b270

Upload 2 files

Browse files

Files changed (2) hide show

app.py +122 -0
requirements.txt +7 -0

app.py ADDED Viewed

	@@ -0,0 +1,122 @@

+# ================================
+#  app.py - Multimodal RAG Chatbot (Hugging Face Spaces Compatible)
+# ================================
+import streamlit as st
+import torch
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
+from pinecone import Pinecone
+from sentence_transformers import SentenceTransformer
+from transformers import CLIPProcessor, CLIPModel
+from PIL import Image
+# ======================================
+# 1. Setup Pinecone Connection
+# ======================================
+pinecone_api_key = "pcsk_3vjZtA_STcjYL9Ec6mXyHVT9jKUBafanqEt6KyWnwAGv535utBtXfuEdaKkS2UitgsM6un"  # 🔥 Replace
+pc = Pinecone(api_key=pinecone_api_key)
+text_index = pc.Index("rag-text-index")
+image_index = pc.Index("rag-image-index")
+# ======================================
+# 2. Setup Local LLM (Flan-T5-Large)
+# ======================================
+model_name = "google/flan-t5-large"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)
+rag_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)
+# ======================================
+# 3. Helper Functions
+# ======================================
+def search_text_index(query_text, top_k=5):
+    text_encoder = SentenceTransformer('sentence-transformers/all-mpnet-base-v2', device=device)
+    query_embedding = text_encoder.encode(query_text).tolist()
+    result = text_index.query(vector=query_embedding, top_k=top_k, include_metadata=True)
+    return result['matches']
+def search_image_index(uploaded_image, top_k=3):
+    clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
+    clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
+    inputs = clip_processor(images=uploaded_image, return_tensors="pt").to(device)
+    with torch.no_grad():
+        query_embedding = clip_model.get_image_features(**inputs)
+    query_embedding = query_embedding[0].cpu().numpy().tolist()
+    result = image_index.query(vector=query_embedding, top_k=top_k, include_metadata=True)
+    return result['matches']
+def prepare_context_from_matches(text_matches, image_matches):
+    context = ""
+    if text_matches:
+        context += "TEXTUAL INFORMATION:\n"
+        for match in text_matches:
+            content = match['metadata'].get('content', '')
+            page = match['metadata'].get('page', 'N/A')
+            context += f"[Page {page}] {content}\n"
+    if image_matches:
+        context += "IMAGE INFORMATION:\n"
+        for match in image_matches:
+            page = match['metadata'].get('page', 'N/A')
+            context += f"[Image extracted from Page {page}]\n"
+    return context.strip()
+def generate_final_answer(context, question):
+    if len(context.split()) < 20:
+        return "Not enough detailed information retrieved to answer properly."
+    prompt = f"""
+    You are a financial expert assistant. Answer ONLY based on the context provided below.
+    Expand financial abbreviations (e.g., EPS → Earnings Per Share) and explain in full sentences.
+    Provide at least 3 complete sentences.
+    CONTEXT:
+    {context}
+    QUESTION:
+    {question}
+    FINAL ANSWER:
+    """
+    output = rag_pipeline(prompt)[0]['generated_text']
+    return output.strip()
+# ======================================
+# 4. Streamlit Web App
+# ======================================
+st.set_page_config(page_title="Multimodal RAG Assistant", page_icon="🤖", layout="centered")
+st.title("📚 Multimodal RAG Assistant")
+st.write("Ask a question based on uploaded PDFs, or upload a relevant image:")
+# Input options
+user_query = st.text_input("Enter your question:")
+uploaded_image = st.file_uploader("Or upload an image:", type=["png", "jpg", "jpeg"])
+if st.button("Submit"):
+    if user_query:
+        text_matches = search_text_index(user_query)
+        image_matches = []
+    elif uploaded_image:
+        img = Image.open(uploaded_image)
+        text_matches = []
+        image_matches = search_image_index(img)
+    else:
+        st.warning("Please either enter a question or upload an image.")
+        st.stop()
+    # Build context
+    context = prepare_context_from_matches(text_matches, image_matches)
+    # Generate final answer
+    answer = generate_final_answer(context, user_query if user_query else "Describe this image.")
+    # Show result
+    st.success("✅ Answer:")
+    st.write(answer)
+    # Show matched chunks
+    with st.expander("🔎 View Retrieved Context"):
+        st.text(context)
+st.sidebar.info("Built with Pinecone + FLAN-T5-Large + Streamlit 🚀")

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+streamlit>=1.24.0
+torch>=2.0.0
+transformers>=4.30.0
+sentence-transformers>=2.2.2
+pinecone>=3.0.0
+Pillow>=9.5.0
+requests>=2.31.0