ibo1234 commited on
Commit
7720b87
Β·
verified Β·
1 Parent(s): 415b270

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +122 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ================================
2
+ # app.py - Multimodal RAG Chatbot (Hugging Face Spaces Compatible)
3
+ # ================================
4
+
5
+ import streamlit as st
6
+ import torch
7
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
8
+ from pinecone import Pinecone
9
+ from sentence_transformers import SentenceTransformer
10
+ from transformers import CLIPProcessor, CLIPModel
11
+ from PIL import Image
12
+
13
+ # ======================================
14
+ # 1. Setup Pinecone Connection
15
+ # ======================================
16
+ pinecone_api_key = "pcsk_3vjZtA_STcjYL9Ec6mXyHVT9jKUBafanqEt6KyWnwAGv535utBtXfuEdaKkS2UitgsM6un" # πŸ”₯ Replace
17
+ pc = Pinecone(api_key=pinecone_api_key)
18
+
19
+ text_index = pc.Index("rag-text-index")
20
+ image_index = pc.Index("rag-image-index")
21
+
22
+ # ======================================
23
+ # 2. Setup Local LLM (Flan-T5-Large)
24
+ # ======================================
25
+ model_name = "google/flan-t5-large"
26
+ device = "cuda" if torch.cuda.is_available() else "cpu"
27
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
28
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)
29
+ rag_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)
30
+
31
+ # ======================================
32
+ # 3. Helper Functions
33
+ # ======================================
34
+ def search_text_index(query_text, top_k=5):
35
+ text_encoder = SentenceTransformer('sentence-transformers/all-mpnet-base-v2', device=device)
36
+ query_embedding = text_encoder.encode(query_text).tolist()
37
+ result = text_index.query(vector=query_embedding, top_k=top_k, include_metadata=True)
38
+ return result['matches']
39
+
40
+ def search_image_index(uploaded_image, top_k=3):
41
+ clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
42
+ clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
43
+ inputs = clip_processor(images=uploaded_image, return_tensors="pt").to(device)
44
+ with torch.no_grad():
45
+ query_embedding = clip_model.get_image_features(**inputs)
46
+ query_embedding = query_embedding[0].cpu().numpy().tolist()
47
+ result = image_index.query(vector=query_embedding, top_k=top_k, include_metadata=True)
48
+ return result['matches']
49
+
50
+ def prepare_context_from_matches(text_matches, image_matches):
51
+ context = ""
52
+ if text_matches:
53
+ context += "TEXTUAL INFORMATION:\n"
54
+ for match in text_matches:
55
+ content = match['metadata'].get('content', '')
56
+ page = match['metadata'].get('page', 'N/A')
57
+ context += f"[Page {page}] {content}\n"
58
+ if image_matches:
59
+ context += "IMAGE INFORMATION:\n"
60
+ for match in image_matches:
61
+ page = match['metadata'].get('page', 'N/A')
62
+ context += f"[Image extracted from Page {page}]\n"
63
+ return context.strip()
64
+
65
+ def generate_final_answer(context, question):
66
+ if len(context.split()) < 20:
67
+ return "Not enough detailed information retrieved to answer properly."
68
+ prompt = f"""
69
+ You are a financial expert assistant. Answer ONLY based on the context provided below.
70
+ Expand financial abbreviations (e.g., EPS β†’ Earnings Per Share) and explain in full sentences.
71
+ Provide at least 3 complete sentences.
72
+
73
+ CONTEXT:
74
+ {context}
75
+
76
+ QUESTION:
77
+ {question}
78
+
79
+ FINAL ANSWER:
80
+ """
81
+ output = rag_pipeline(prompt)[0]['generated_text']
82
+ return output.strip()
83
+
84
+ # ======================================
85
+ # 4. Streamlit Web App
86
+ # ======================================
87
+ st.set_page_config(page_title="Multimodal RAG Assistant", page_icon="πŸ€–", layout="centered")
88
+ st.title("πŸ“š Multimodal RAG Assistant")
89
+
90
+ st.write("Ask a question based on uploaded PDFs, or upload a relevant image:")
91
+
92
+ # Input options
93
+ user_query = st.text_input("Enter your question:")
94
+ uploaded_image = st.file_uploader("Or upload an image:", type=["png", "jpg", "jpeg"])
95
+
96
+ if st.button("Submit"):
97
+ if user_query:
98
+ text_matches = search_text_index(user_query)
99
+ image_matches = []
100
+ elif uploaded_image:
101
+ img = Image.open(uploaded_image)
102
+ text_matches = []
103
+ image_matches = search_image_index(img)
104
+ else:
105
+ st.warning("Please either enter a question or upload an image.")
106
+ st.stop()
107
+
108
+ # Build context
109
+ context = prepare_context_from_matches(text_matches, image_matches)
110
+
111
+ # Generate final answer
112
+ answer = generate_final_answer(context, user_query if user_query else "Describe this image.")
113
+
114
+ # Show result
115
+ st.success("βœ… Answer:")
116
+ st.write(answer)
117
+
118
+ # Show matched chunks
119
+ with st.expander("πŸ”Ž View Retrieved Context"):
120
+ st.text(context)
121
+
122
+ st.sidebar.info("Built with Pinecone + FLAN-T5-Large + Streamlit πŸš€")
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ streamlit>=1.24.0
2
+ torch>=2.0.0
3
+ transformers>=4.30.0
4
+ sentence-transformers>=2.2.2
5
+ pinecone>=3.0.0
6
+ Pillow>=9.5.0
7
+ requests>=2.31.0