RaghuCourage9605 commited on
Commit
511a3eb
·
verified ·
1 Parent(s): a44bd4f

Upload 4 files

Browse files
pages/1_Document_QA.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain.schema import Document
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain_community.document_loaders import (
5
+ PythonLoader,
6
+ PyMuPDFLoader,
7
+ WikipediaLoader,
8
+ )
9
+ from langchain.vectorstores import FAISS
10
+ from langchain_community.docstore import InMemoryDocstore
11
+ from langchain_huggingface import HuggingFaceEmbeddings
12
+ from langchain_cerebras import ChatCerebras
13
+ from langchain_mistralai import ChatMistralAI
14
+ from langchain_core.messages import HumanMessage
15
+ from langchain_google_genai import ChatGoogleGenerativeAI
16
+ from langchain.prompts import ChatPromptTemplate
17
+ from langchain.schema import StrOutputParser
18
+ from uuid import uuid4
19
+ import numpy as np
20
+ import faiss
21
+ import whisper
22
+ import torch
23
+ import os
24
+ from dotenv import load_dotenv
25
+ import logging
26
+ import base64
27
+ import asyncio
28
+ from concurrent.futures import ThreadPoolExecutor
29
+
30
+ if "models" not in st.session_state:
31
+ st.session_state.models = {
32
+ "Gemini": ChatGoogleGenerativeAI(
33
+ model="gemini-2.0-flash-exp",
34
+ temperature=0.8,
35
+ verbose=True,
36
+ api_key=os.getenv("GOOGLE_AI_STUDIO_API_KEY")
37
+ ),
38
+ "Mistral": ChatMistralAI(
39
+ model_name="open-mistral-nemo",
40
+ temperature=0.8,
41
+ verbose=True
42
+ ),
43
+ "Llama": ChatCerebras(
44
+ model="llama-3.3-70b",
45
+ temperature=0.8,
46
+ verbose=True,
47
+ api_key=os.getenv("CEREBRAS_API_KEY")
48
+ )
49
+ }
50
+ if "embeddings" not in st.session_state:
51
+ model_name = "sentence-transformers/all-mpnet-base-v2"
52
+ model_kwargs = {'device': 'cpu'}
53
+ encode_kwargs = {'normalize_embeddings': False}
54
+ st.session_state.embeddings = HuggingFaceEmbeddings(
55
+ model_name=model_name,
56
+ model_kwargs=model_kwargs,
57
+ encode_kwargs=encode_kwargs
58
+ )
59
+
60
+ st.header(" 1. 🗂️ ✨ Document Question Answering")
61
+ st.write("""
62
+ In this section, you can upload a document files and query its content for answers.
63
+ The system will process the document and allow you to ask specific questions about the text in the document.
64
+
65
+ Here’s a list of the different types of document files for Chatting with your Doc:
66
+
67
+ - Text Files (.txt)
68
+ - PDF Files (.pdf)
69
+ - Word Documents (.doc, .docx)
70
+ - ArXiV Papers (.pdf)
71
+ """)
72
+
73
+ uploaded_doc = st.file_uploader("Upload the required documents (.pdf,.docx,)",type=[".pdf","docx"])
74
+
75
+
76
+
pages/2_Image_QA.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain.schema import Document
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain.vectorstores import FAISS
5
+ from langchain_community.docstore import InMemoryDocstore
6
+ from langchain_huggingface import HuggingFaceEmbeddings
7
+ from langchain_core.messages import HumanMessage
8
+ from langchain_cerebras import ChatCerebras
9
+ from langchain_mistralai import ChatMistralAI
10
+ from langchain_google_genai import ChatGoogleGenerativeAI
11
+ from langchain.prompts import ChatPromptTemplate
12
+ from langchain.schema import StrOutputParser
13
+ from uuid import uuid4
14
+ import faiss
15
+ import os
16
+ from dotenv import load_dotenv
17
+ import logging
18
+ import httpx
19
+ import base64
20
+ import asyncio
21
+
22
+ # Initialize environment variables and logging
23
+ load_dotenv()
24
+ logging.basicConfig(level=logging.INFO)
25
+ logger = logging.getLogger(__name__)
26
+
27
+ # Async function to invoke chain
28
+ async def async_invoke_chain(chain, input_data):
29
+ loop = asyncio.get_event_loop()
30
+ return await loop.run_in_executor(None, chain.invoke, input_data)
31
+
32
+ # Initialize session state for messages and models
33
+ if "messages" not in st.session_state:
34
+ st.session_state.messages = []
35
+
36
+ if "models" not in st.session_state:
37
+ st.session_state.models = {
38
+ "Gemini": ChatGoogleGenerativeAI(
39
+ model="gemini-2.0-flash-exp",
40
+ temperature=0.8,
41
+ verbose=True,
42
+ api_key=os.getenv("GOOGLE_AI_STUDIO_API_KEY")
43
+ ),
44
+ "Mistral": ChatMistralAI(
45
+ model_name="open-mistral-nemo",
46
+ temperature=0.8,
47
+ verbose=True
48
+ ),
49
+ "Llama": ChatCerebras(
50
+ model="llama-3.3-70b",
51
+ temperature=0.8,
52
+ verbose=True,
53
+ api_key=os.getenv("CEREBRAS_API_KEY")
54
+ )
55
+ }
56
+
57
+ # Initialize embeddings model
58
+ if "embeddings" not in st.session_state:
59
+ model_name = "sentence-transformers/all-mpnet-base-v2"
60
+ model_kwargs = {'device': 'cpu'}
61
+ encode_kwargs = {'normalize_embeddings': False}
62
+ st.session_state.embeddings = HuggingFaceEmbeddings(
63
+ model_name=model_name,
64
+ model_kwargs=model_kwargs,
65
+ encode_kwargs=encode_kwargs
66
+ )
67
+
68
+ st.header("📸📈📊 ֎ Image Content Analysis and Question Answering")
69
+
70
+ # Brief overview for image content analysis
71
+ description = """
72
+ Upload an image, and the AI will analyze its content and answer your questions.
73
+ It can interpret various types of images including:
74
+ - General imagery (objects, people, scenes)
75
+ - Diagrams, graphs, and data visualizations
76
+ - Scientific and medical images
77
+ - Text-based images (documents, screenshots)
78
+ """
79
+
80
+ # Display the brief description
81
+ st.write(description)
82
+
83
+ # File upload and URL input
84
+ st.header("Upload Image for Question Answering")
85
+ uploaded_file = st.file_uploader("Upload an image (.jpeg, .jpg, .png, etc.):", type=["jpeg", "jpg", "png"])
86
+
87
+ st.header("Or Enter the Image URL :")
88
+ image_url = st.text_input("Enter the image URL")
89
+
90
+ image_data = None
91
+
92
+ if uploaded_file:
93
+ st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
94
+ image_data = base64.b64encode(uploaded_file.read()).decode("utf-8")
95
+ elif image_url:
96
+ try:
97
+ with httpx.Client() as client:
98
+ response = client.get(image_url)
99
+ response.raise_for_status()
100
+ st.image(response.content, caption="Image from URL", use_column_width=True)
101
+ image_data = base64.b64encode(response.content).decode("utf-8")
102
+ except Exception as e:
103
+ st.error(f"Error fetching image from URL: {e}")
104
+
105
+ if image_data:
106
+ message = HumanMessage(content=[{
107
+ "type": "text", "text": "Describe what is in the image in detail."
108
+ }, {
109
+ "type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}
110
+ }])
111
+
112
+ # Generate response from the model
113
+ response = asyncio.run(async_invoke_chain(st.session_state.models["Gemini"], [message]))
114
+ knowledge = [Document(page_content=response.content)]
115
+
116
+ # Split text into chunks for indexing
117
+ text_splitter = RecursiveCharacterTextSplitter(separators="\n\n", chunk_size=1500, chunk_overlap=200)
118
+ chunks = text_splitter.split_documents(knowledge)
119
+
120
+ # Create FAISS IndexHNSWFlat for indexing image embeddings
121
+ index = faiss.IndexFlatL2(len(st.session_state.embeddings.embed_query("hello world")))
122
+
123
+ # Create FAISS vector store for document retrieval
124
+ vector_store = FAISS(
125
+ embedding_function=st.session_state.embeddings,
126
+ index=index,
127
+ docstore=InMemoryDocstore(),
128
+ index_to_docstore_id={},
129
+ )
130
+
131
+ # Generate unique IDs and add documents to the store
132
+ ids = [str(uuid4()) for _ in range(len(chunks))]
133
+ vector_store.add_documents(documents=chunks, ids=ids)
134
+
135
+ # Update the mapping between FAISS index and document IDs
136
+ for idx, doc_id in enumerate(ids):
137
+ vector_store.index_to_docstore_id[idx] = doc_id
138
+
139
+ # Create image retriever with the FAISS index
140
+ image_retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 6})
141
+
142
+ def get_retrieved_context(query):
143
+ retrieved_documents = image_retriever.get_relevant_documents(query)
144
+ return "\n".join(doc.page_content for doc in retrieved_documents)
145
+
146
+ # User query for image QA
147
+ user_input = st.chat_input("Ask a question about the image:")
148
+
149
+ prompt = ChatPromptTemplate.from_messages([(
150
+ "system", "You are an expert in analyzing images. Use the context: {context} to answer the query."
151
+ ), ("human", "{question}")])
152
+
153
+ if user_input:
154
+ st.session_state.messages.append({"role": "user", "content": user_input})
155
+ qa_chain = prompt | st.session_state.models["Mistral"] | StrOutputParser()
156
+ context = get_retrieved_context(user_input)
157
+ response_message = asyncio.run(async_invoke_chain(qa_chain, {"question": user_input, "context": context}))
158
+ st.session_state.messages.append({"role": "assistant", "content": response_message})
159
+ for message in st.session_state.messages:
160
+ st.chat_message(message["role"]).markdown(message["content"])
pages/3_Video_QA.py ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain.schema import Document
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain.vectorstores import FAISS
5
+ from langchain_community.docstore import InMemoryDocstore
6
+ from langchain_huggingface import HuggingFaceEmbeddings
7
+ from langchain_cerebras import ChatCerebras
8
+ from langchain_mistralai import ChatMistralAI
9
+ from langchain_google_genai import ChatGoogleGenerativeAI
10
+ from langchain.prompts import ChatPromptTemplate
11
+ from langchain.schema import StrOutputParser
12
+ from uuid import uuid4
13
+ import faiss
14
+ import whisper
15
+ import yt_dlp
16
+ import torch
17
+ import os
18
+ from dotenv import load_dotenv
19
+ import logging
20
+ import asyncio
21
+
22
+ load_dotenv()
23
+ logging.basicConfig(level=logging.INFO)
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ async def async_invoke_chain(chain, input_data):
28
+ loop = asyncio.get_event_loop()
29
+ return await loop.run_in_executor(None, chain.invoke, input_data)
30
+
31
+ if "models" not in st.session_state:
32
+ st.session_state.models = {
33
+ "Gemini": ChatGoogleGenerativeAI(
34
+ model="gemini-2.0-flash-exp",
35
+ temperature=0.8,
36
+ verbose=True,
37
+ api_key=os.getenv("GOOGLE_AI_STUDIO_API_KEY"),
38
+ ),
39
+ "Mistral": ChatMistralAI(
40
+ model_name="open-mistral-nemo", temperature=0.8, verbose=True
41
+ ),
42
+ "Llama": ChatCerebras(
43
+ model="llama-3.3-70b",
44
+ temperature=0.8,
45
+ verbose=True,
46
+ api_key=os.getenv("CEREBRAS_API_KEY"),
47
+ ),
48
+ }
49
+
50
+ # Initialize embeddings
51
+ if "embeddings" not in st.session_state:
52
+ model_name = "sentence-transformers/all-mpnet-base-v2"
53
+ model_kwargs = {"device": "cpu"}
54
+ encode_kwargs = {"normalize_embeddings": False}
55
+ st.session_state.embeddings = HuggingFaceEmbeddings(
56
+ model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
57
+ )
58
+
59
+ st.header("1. 🎬📽️🎞️֎ Video Question Answering ")
60
+ st.write("""
61
+ In this section, you can upload a video file or provide a YouTube URL to ask questions related to its content.
62
+ The system will process the video and provide relevant answers based on the transcription and analysis of the video content.
63
+ """)
64
+
65
+ if "messages" not in st.session_state:
66
+ st.session_state.messages = []
67
+
68
+ # Function to transcribe video using whisper
69
+ async def process_video(video_path):
70
+ st.info("Transcribing video...")
71
+ model = whisper.load_model("small")
72
+ model = model.to(device="cuda" if torch.cuda.is_available() else "cpu")
73
+ loop = asyncio.get_event_loop()
74
+ result = await loop.run_in_executor(None, model.transcribe, video_path)
75
+ st.success("Transcription complete")
76
+ return result["text"]
77
+
78
+
79
+ # Function to download video from YouTube using yt-dlp
80
+ def download_video(video_url, save_path="./"):
81
+ try:
82
+ ydl_opts = {
83
+ "outtmpl": f"{save_path}/%(title)s.%(ext)s", # Save path and file name
84
+ }
85
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
86
+ ydl.download([video_url])
87
+ print("Download complete!")
88
+ except Exception as e:
89
+ print(f"An error occurred: {e}")
90
+
91
+
92
+ # Streamlit UI
93
+ st.header("Upload Video 📤")
94
+ uploaded_file = st.file_uploader(
95
+ "Upload a video file (.mp4, .webm, .mkv):", type=["mp4", "webm", "mkv"]
96
+ )
97
+
98
+ st.header("Provide Youtube Video URL 🔗")
99
+ uploaded_video_url = st.text_input("Enter the URL of the video you want to chat with")
100
+ st.warning(
101
+ "Please note that processing a YouTube URL may take some time, depending on the video's length and the current load."
102
+ )
103
+
104
+
105
+ if st.button("Process Video"):
106
+ if uploaded_video_url:
107
+ save_path = "./downloads"
108
+ os.makedirs(save_path, exist_ok=True)
109
+ download_video(uploaded_video_url, save_path) # Download the video from YouTube
110
+ video_file = next(
111
+ (f for f in os.listdir(save_path) if f.endswith((".mp4", ".webm", ".mkv"))),
112
+ None,
113
+ )
114
+ if video_file:
115
+ video_path = os.path.join(save_path, video_file)
116
+ transcription = asyncio.run(
117
+ process_video(video_path)
118
+ ) # Fetch transcription text
119
+ st.session_state.transcription = transcription
120
+ st.success("Transcription complete!")
121
+ else:
122
+ st.error("No video file found after download.")
123
+ elif uploaded_file:
124
+ save_path = "./uploads"
125
+ os.makedirs(save_path, exist_ok=True)
126
+ video_path = os.path.join(save_path, uploaded_file.name)
127
+ with open(video_path, "wb") as f:
128
+ f.write(uploaded_file.read())
129
+ st.session_state.transcription = asyncio.run(
130
+ process_video(video_path)
131
+ ) # Fetch transcription text
132
+ st.success("Transcription complete!")
133
+ else:
134
+ st.error("Please upload a video file or provide a video URL.")
135
+
136
+ # Handle LLM response after transcription
137
+ if "transcription" in st.session_state:
138
+ transcription = st.session_state.transcription
139
+ response = asyncio.run(async_invoke_chain(st.session_state.models["Gemini"], transcription))
140
+ knowledge = [Document(page_content=response.content)]
141
+
142
+ text_splitter = RecursiveCharacterTextSplitter(
143
+ separators="\n\n", chunk_size=1500, chunk_overlap=200
144
+ )
145
+ chunks = text_splitter.split_documents(knowledge)
146
+ index = faiss.IndexFlatL2(
147
+ len(st.session_state.embeddings.embed_query("hello world"))
148
+ )
149
+
150
+ # Create FAISS vector store for document retrieval
151
+ vector_store = FAISS(
152
+ embedding_function=st.session_state.embeddings,
153
+ index=index,
154
+ docstore=InMemoryDocstore(),
155
+ index_to_docstore_id={},
156
+ )
157
+
158
+ # Generate unique IDs and add documents to the store
159
+ ids = [str(uuid4()) for _ in range(len(chunks))]
160
+ vector_store.add_documents(documents=chunks, ids=ids)
161
+
162
+ # Update the mapping between FAISS index and document IDs
163
+ for idx, doc_id in enumerate(ids):
164
+ vector_store.index_to_docstore_id[idx] = doc_id
165
+
166
+ # Create video retriever with the FAISS index
167
+ video_retriever = vector_store.as_retriever(
168
+ search_type="similarity", search_kwargs={"k": 4}
169
+ )
170
+
171
+ def get_retrieved_context(query):
172
+ retrieved_documents = video_retriever.get_relevant_documents(query)
173
+ return "\n".join(doc.page_content for doc in retrieved_documents)
174
+
175
+ # User query for video QA
176
+ user_input = st.chat_input("Ask a question about the video:")
177
+ prompt = ChatPromptTemplate.from_messages(
178
+ [
179
+ (
180
+ "system",
181
+ "You are an expert in analyzing videos. Use the context: {context} to answer the query.",
182
+ ),
183
+ ("human", "{question}"),
184
+ ]
185
+ )
186
+
187
+ if user_input:
188
+ st.session_state.messages.append({"role": "user", "content": user_input})
189
+ qa_chain = prompt | st.session_state.models["Mistral"] | StrOutputParser()
190
+ context = get_retrieved_context(user_input)
191
+ response_message = asyncio.run(
192
+ async_invoke_chain(qa_chain, {"question": user_input, "context": context})
193
+ )
194
+ st.session_state.messages.append(
195
+ {"role": "assistant", "content": response_message}
196
+ )
197
+ for message in st.session_state.messages:
198
+ st.chat_message(message["role"]).markdown(message["content"])
199
+ else:
200
+ st.error("No transcription available. Please upload or process a video first.")
pages/4_Audio_QA.py ADDED
File without changes