Spaces:

Al1Abdullah
/

Multimodal_CV

Runtime error

App Files Files Community

Al1Abdullah commited on Jul 14, 2025

Commit

593f0ea

0 Parent(s):

Initial commit for Hugging Face Space

Browse files

Files changed (19) hide show

.env +1 -0
app.py +148 -0
config.yaml +7 -0
packages.txt +2 -0
requirements.txt +24 -0
src/__pycache__/audio_processor.cpython-311.pyc +0 -0
src/__pycache__/llama_cpp_chains.cpython-311.pyc +0 -0
src/__pycache__/ollama_chain.cpython-311.pyc +0 -0
src/__pycache__/pdf_handler.cpython-311.pyc +0 -0
src/__pycache__/utils.cpython-311.pyc +0 -0
src/__pycache__/vectorstore.cpython-311.pyc +0 -0
src/__pycache__/vqa.cpython-311.pyc +0 -0
src/audio_processor.py +38 -0
src/llama_cpp_chains.py +38 -0
src/ollama_chain.py +134 -0
src/pdf_handler.py +49 -0
src/utils.py +9 -0
src/vectorstore.py +90 -0
src/vqa.py +34 -0

.env ADDED Viewed

	@@ -0,0 +1 @@


1	+ PINECONE_API_KEY = "pcsk_2dE5m6_B9WcbjZ1GcfT6p19rSXwMF2ULoqtc11xrXgngyhALBzmcrrxPLVM83xeKq537HX"

app.py ADDED Viewed

	@@ -0,0 +1,148 @@

+import streamlit as st
+from src.ollama_chain import OllamaChain, OllamaRAGChain
+from src.llama_cpp_chains import LlamaChain
+from src.pdf_handler import extract_pdf
+from src.vqa import answer_visual_question
+from src.audio_processor import AudioProcessor
+from langchain_community.chat_message_histories import StreamlitChatMessageHistory
+from dotenv import load_dotenv
+import os
+load_dotenv()
+audio_processor = AudioProcessor()
+@st.cache_resource
+def load_chain(_chat_memory):
+    if st.session_state.pdf_chat:
+        return OllamaRAGChain(_chat_memory)
+    else:
+        return OllamaChain(_chat_memory)
+def file_uploader_change():
+    if st.session_state.uploaded_file:
+        if not st.session_state.pdf_chat:
+            clear_cache()
+        st.session_state.knowledge_change = True
+    else:
+        clear_cache()
+def toggle_pdf_chat_change():
+    clear_cache()
+    if st.session_state.pdf_chat and st.session_state.uploaded_file:
+        st.session_state.knowledge_change = True
+def clear_input_field():
+    st.session_state.user_question = st.session_state.user_input
+def set_send_input():
+    st.session_state.send_input = True
+    clear_input_field()
+def clear_cache():
+    st.cache_resource.clear()
+def initial_session_state():
+    st.session_state.send_input = False
+    st.session_state.knowledge_change = False
+    st.session_state.user_question = ""
+def main():
+    st.title('OVERKILL LLM')
+    os.makedirs('./.cache/temp_files', exist_ok=True)  # Ensure temp folder exists
+    chat_container = st.container()
+    # Sidebar
+    st.sidebar.toggle('PDF Chat', value=False, key='pdf_chat', on_change=toggle_pdf_chat_change)
+    uploaded_pdf = st.sidebar.file_uploader(
+        'Upload your pdf files',
+        type='pdf',
+        accept_multiple_files=True,
+        key='uploaded_file',
+        on_change=file_uploader_change
+    )
+    uploaded_image = st.sidebar.file_uploader('Upload Images', type=['jpg', 'jpeg', 'png'], key='uploaded_image')
+    st.sidebar.file_uploader('Upload Audio', type=['wav', 'mp3'], key='uploaded_audio')
+    uploaded_audio = st.session_state.get('uploaded_audio')
+    # Optional reset
+    if st.sidebar.button("🔄 Reset Chat"):
+        st.session_state.clear()
+        st.experimental_rerun()
+    # Input objects
+    user_input = st.text_input('Message OVERKILL', key='user_input', on_change=set_send_input)
+    send_button = st.button('Send', key='send_button')
+    # Initial session setup
+    if 'send_input' not in st.session_state or 'user_question' not in st.session_state:
+        initial_session_state()
+    chat_history = StreamlitChatMessageHistory(key='history')
+    with chat_container:
+        for msg in chat_history.messages:
+            st.chat_message(msg.type).write(msg.content)
+    try:
+        llm_chain = load_chain(chat_history)
+    except Exception as e:
+        st.error(f"Error loading LLM chain: {e}")
+        return
+    if st.session_state.knowledge_change:
+        with st.spinner('Updating knowledge base'):
+            try:
+                llm_chain.update_chain(uploaded_pdf)
+                st.session_state.knowledge_change = False
+            except Exception as e:
+                st.error(f"Error updating knowledge base: {e}")
+                return
+    if (send_button or st.session_state.send_input) and st.session_state.user_question != "":
+        with chat_container:
+            st.chat_message('user').write(st.session_state.user_question)
+            try:
+                if uploaded_image:
+                    image_path = os.path.join('./.cache/temp_files', uploaded_image.name)
+                    with open(image_path, 'wb') as f:
+                        f.write(uploaded_image.getvalue())
+                    llm_response = answer_visual_question(image_path, st.session_state.user_question)
+                elif uploaded_audio:
+                    audio_path = os.path.join('./.cache/temp_files', uploaded_audio.name)
+                    with open(audio_path, 'wb') as f:
+                        f.write(uploaded_audio.getvalue())
+                    st.write(f"Processing audio file: {audio_path}")
+                    question = audio_processor.audio_to_text(audio_path)
+                    st.write(f"Converted audio to text: {question}")
+                    llm_response = llm_chain.run(user_input=question)
+                else:
+                    llm_response = llm_chain.run(user_input=st.session_state.user_question)
+                st.session_state.user_question = ""
+                st.chat_message('ai').write(llm_response)
+                audio_file = audio_processor.text_to_speech(llm_response)
+                audio_bytes = open(audio_file, 'rb').read()
+                st.audio(audio_bytes, format='audio/mp3')
+            except Exception as e:
+                st.error(f"Error during chat: {e}")
+if __name__ == '__main__':
+    main()

config.yaml ADDED Viewed

	@@ -0,0 +1,7 @@

+chat_model:
+  'model': "llama3:latest"
+  'temperature': 0.75
+  'num_gpu': 1
+vector_database:
+  chroma:
+chat_session_path: './chat_session/'

packages.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ poppler-utils
2	+ ffmpeg

requirements.txt ADDED Viewed

	@@ -0,0 +1,24 @@

+accelerate
+torch
+transformers
+torchaudio
+langchain
+langchain-community
+langchain-pinecone
+langchain-chroma
+pinecone-client
+sentence-transformers
+pypdf
+PyMuPDF
+pdf2image
+pillow
+opencv-python
+ffmpeg-python
+gtts
+pydub
+speechrecognition
+streamlit>=1.32
+google-generativeai
+requests
+python-dotenv
+PyYAML

src/__pycache__/audio_processor.cpython-311.pyc ADDED Viewed

Binary file (3.15 kB). View file

src/__pycache__/llama_cpp_chains.cpython-311.pyc ADDED Viewed

Binary file (2.24 kB). View file

src/__pycache__/ollama_chain.cpython-311.pyc ADDED Viewed

Binary file (7.36 kB). View file

src/__pycache__/pdf_handler.cpython-311.pyc ADDED Viewed

Binary file (2.57 kB). View file

src/__pycache__/utils.cpython-311.pyc ADDED Viewed

Binary file (626 Bytes). View file

src/__pycache__/vectorstore.cpython-311.pyc ADDED Viewed

Binary file (4.94 kB). View file

src/__pycache__/vqa.cpython-311.pyc ADDED Viewed

Binary file (2.93 kB). View file

src/audio_processor.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import speech_recognition as sr
+from gtts import gTTS
+import tempfile
+from pydub import AudioSegment
+class AudioProcessor:
+    def __init__(self):
+        self.recognizer = sr.Recognizer()
+    def audio_to_text(self, audio_file):
+        """Process an uploaded audio file and convert it to text."""
+        try:
+            # Convert audio file to WAV format
+            audio = AudioSegment.from_file(audio_file)
+            wav_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
+            audio.export(wav_file.name, format="wav")
+            print(f"Converted audio to WAV: {wav_file.name}")  # Debug statement
+            with sr.AudioFile(wav_file.name) as source:
+                audio = self.recognizer.record(source)
+                try:
+                    text = self.recognizer.recognize_google(audio)
+                except sr.UnknownValueError:
+                    text = "Could not understand audio"
+                except sr.RequestError:
+                    text = "Could not request results"
+            print(f"Recognized text: {text}")  # Debug statement
+            return text
+        except Exception as e:
+            print(f"Error processing audio file: {e}")  # Debug statement
+            return f"Error processing audio file: {e}"
+    def text_to_speech(self, text):
+        """Convert text to speech using gTTS and save as a .mp3 file."""
+        tts = gTTS(text)
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
+            tts.save(temp_audio.name)
+            return temp_audio.name

src/llama_cpp_chains.py ADDED Viewed

	@@ -0,0 +1,38 @@

+from langchain_community.llms import LlamaCpp
+from langchain.prompts import PromptTemplate
+from langchain.memory import ConversationBufferWindowMemory
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.runnables import RunnableSequence
+from src.utils import load_config
+class LlamaChain:
+    def __init__(self, chat_memory) -> None:
+        prompt = PromptTemplate(
+            template="""<|begin_of_text|>
+            <|start_header_id|>system<|end_header_id|>
+            You are a helpful and knowledgeable AI assistant.
+            <|eot_id|>
+            <|start_header_id|>user<|end_header_id|>
+            Previous conversation={chat_history}
+            Question: {input}
+            Answer: <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
+            input_variables=['chat_history', 'input']
+        )
+        self.memory = ConversationBufferWindowMemory(
+            memory_key='chat_history',
+            chat_memory=chat_memory,
+            k=3,
+            return_messages=True
+        )
+        config = load_config()
+        llm = LlamaCpp(**config['chat_model'])
+        self.llm_chain = RunnableSequence(prompt | llm | self.memory | StrOutputParser())
+    def run(self, user_input):
+        response = self.llm_chain.invoke(user_input)
+        return response['text']

src/ollama_chain.py ADDED Viewed

	@@ -0,0 +1,134 @@

+from langchain_community.llms import Ollama
+from langchain.prompts import PromptTemplate, ChatPromptTemplate, MessagesPlaceholder
+from langchain.memory import ConversationBufferWindowMemory
+from langchain.chains import LLMChain, create_history_aware_retriever, create_retrieval_chain
+from langchain.chains.combine_documents import create_stuff_documents_chain
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.runnables.history import RunnableWithMessageHistory
+from langchain.schema import Document
+from src.utils import load_config
+from src.vectorstore import VectorDB
+def format_docs(docs: list[Document]):
+    return '\n\n'.join(doc.page_content for doc in docs)
+class OllamaChain:
+    def __init__(self, chat_memory) -> None:
+        prompt = PromptTemplate(
+            template="""<|begin_of_text|>
+            <|start_header_id|>system<|end_header_id|>
+            You are a honest and unbiased AI assistant
+            <|eot_id|>
+            <|start_header_id|>user<|end_header_id|>
+            Previous conversation={chat_history}
+            Question: {input}
+            Answer: <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
+            input_variables=['chat_history', 'input']
+        )
+        self.memory = ConversationBufferWindowMemory(
+            memory_key='chat_history',
+            chat_memory=chat_memory,
+            k=3,
+            return_messages=True
+        )
+        config = load_config()
+        llm = Ollama(**config['chat_model'])
+        # llm = Ollama(model='llama3:latest', temperature=0.75, num_gpu=1)
+        self.llm_chain = LLMChain(prompt=prompt, llm=llm, memory=self.memory, output_parser=StrOutputParser())
+        # runnable = prompt | llm
+    def run(self, user_input):
+        response = self.llm_chain.invoke(user_input)
+        return response['text']
+class OllamaRAGChain:
+    def __init__(self, chat_memory, uploaded_file=None):
+        # initialize vector db using config
+        from src.utils import load_config
+        config = load_config()
+        vector_db_config = config.get('vector_database', {})
+        db_name = 'pinecone' if 'pinecone' in vector_db_config else 'chroma'
+        index_name = 'default'
+        self.vector_db = VectorDB(db_name, index_name)
+        if uploaded_file:
+            self.update_knowledge_base(uploaded_file)
+        # initialize llm
+        config = load_config()
+        self.llm = Ollama(**config['chat_model'])
+        # initialize memory
+        self.chat_memory = chat_memory
+        # initialize sub chain with history message
+        contextual_q_system_prompt = """Given a chat history and the latest user question which might refer to context \
+        in the chat history. Check if the user's question refers to the chat history or not. If does, formulate a \
+        standalone question which is incorporated from the latest question and history and can be understood without \
+        the chat history.
+        Do NOT answer the question, just reformulate it if needed and otherwise return it as is."""
+        self.contextual_q_prompt = ChatPromptTemplate.from_messages(
+            [
+                ('system', contextual_q_system_prompt),
+                MessagesPlaceholder('chat_history'),
+                ('human', '{input}'),
+            ]
+        )
+        self.history_aware_retriever = create_history_aware_retriever(
+            self.llm, self.vector_db.as_retriever(), self.contextual_q_prompt
+        )
+        # initialize qa chain
+        qa_system_prompt = """You are an assistant for question-answering tasks. Use the following pieces of retrieved\
+        context to answer the question. If you don't know the answer, just say that you don't know.
+        Context: {context}"""
+        qa_prompt = ChatPromptTemplate.from_messages(
+            [
+                ('system', qa_system_prompt),
+                MessagesPlaceholder('chat_history'),
+                ('human', '{input}'),
+            ]
+        )
+        self.question_answer_chain = create_stuff_documents_chain(self.llm, qa_prompt)
+        rag_chain = create_retrieval_chain(self.history_aware_retriever, self.question_answer_chain)
+        self.conversation_rag_chain = RunnableWithMessageHistory(
+            rag_chain,
+            lambda session_id: chat_memory,
+            input_messages_key='input',
+            history_messages_key='chat_history',
+            output_messages_key='answer'
+        )
+    def run(self, user_input):
+        config = {"configurable": {"session_id": "any"}}
+        response = self.conversation_rag_chain.invoke({'input': user_input}, config)
+        return response['answer']
+    def update_chain(self, uploaded_pdf):
+        self.update_knowledge_base(uploaded_pdf)
+        self.history_aware_retriever = create_history_aware_retriever(
+            self.llm, self.vector_db.as_retriever(), self.contextual_q_prompt
+        )
+        self.conversation_rag_chain = RunnableWithMessageHistory(
+            create_retrieval_chain(self.history_aware_retriever, self.question_answer_chain),
+            lambda session_id: self.chat_memory,
+            input_messages_key='input',
+            history_messages_key='chat_history',
+            output_messages_key='answer'
+        )
+    def update_knowledge_base(self, uploaded_pdf):
+        self.vector_db.index(uploaded_pdf)

src/pdf_handler.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import os
+from langchain_community.document_loaders import PyPDFLoader, PyPDFDirectoryLoader
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain.schema.document import Document
+def create_cache_dir(directory=None):
+    if not directory:
+        directory = './.cache'
+    os.makedirs('./.cache', exist_ok=True)
+    return directory
+def load_pdf(file_path):
+    loader = PyPDFLoader(file_path)
+    return loader.load()
+def load_pdf_directory(directory):
+    loader = PyPDFDirectoryLoader(directory)
+    return loader.load()
+def split_pdf(pdfs: list[Document]):
+    splitter = RecursiveCharacterTextSplitter(
+        chunk_size=512,
+        chunk_overlap=64,
+        length_function=len,
+        is_separator_regex=False
+    )
+    return splitter.split_documents(pdfs)
+def extract_pdf(uploaded_pdf):
+    cache_dir = create_cache_dir()
+    cache_dir = os.path.join(cache_dir, 'temp_files')
+    os.makedirs(cache_dir, exist_ok=True)
+    # Support both single file and list of files
+    if not isinstance(uploaded_pdf, list):
+        uploaded_pdf = [uploaded_pdf]
+    for file in uploaded_pdf:
+        file_path = os.path.join(cache_dir, file.name)
+        with open(file_path, 'wb') as w:
+            w.write(file.getvalue())
+    return cache_dir

src/utils.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import json
+import yaml
+def load_config():
+    with open('./config.yaml', 'r') as f:
+        config = yaml.safe_load(f)
+    return config

src/vectorstore.py ADDED Viewed

	@@ -0,0 +1,90 @@

+from pinecone import Pinecone, ServerlessSpec, PodSpec
+from langchain_pinecone import PineconeVectorStore
+from langchain_chroma import Chroma
+from langchain_community.embeddings import OllamaEmbeddings
+from langchain.indexes import SQLRecordManager, index
+from src.pdf_handler import extract_pdf, load_pdf_directory, split_pdf
+from src.utils import load_config
+import os
+import shutil
+from dotenv import load_dotenv
+load_dotenv()
+def setup_pinecone(index_name, embedding_model, embedding_dim, metric='cosine', use_serverless=True):
+    pc = Pinecone(api_key=os.environ.get('PINECONE_API_KEY'))
+    if use_serverless:
+        spec = ServerlessSpec(cloud='aws', region='us-east-1')
+    else:
+        spec = PodSpec()
+    if index_name in pc.list_indexes().names():
+        pc.delete_index(index_name)
+    pc.create_index(
+        index_name,
+        dimension=embedding_dim,
+        metric=metric,
+        spec=spec
+    )
+    db = PineconeVectorStore(index_name=index_name, embedding=embedding_model)
+    return db
+def setup_chroma(index_name, embedding_model, persist_directory=None):
+    if not persist_directory:
+        persist_directory = './.cache/database'
+    os.makedirs(persist_directory, exist_ok=True)
+    db = Chroma(index_name, embedding_function=embedding_model, persist_directory=persist_directory)
+    return db
+class VectorDB:
+    def __init__(self, db_name=None, index_name=None, cache_dir=None):
+        config = load_config()
+        vector_db_config = config.get('vector_database', {})
+        # Determine DB type from config, fallback to argument or chroma
+        if db_name is None:
+            db_name = 'pinecone' if 'pinecone' in vector_db_config else 'chroma'
+        if index_name is None:
+            index_name = 'default'
+        embedding = OllamaEmbeddings(model='nomic-embed-text:latest', num_gpu=1)
+        if not cache_dir:
+            cache_dir = './.cache/database'
+        self.cache_dir = cache_dir
+        os.makedirs(self.cache_dir, exist_ok=True)
+        if db_name == 'pinecone':
+            if not os.environ.get('PINECONE_API_KEY'):
+                raise ValueError("PINECONE_API_KEY environment variable is not set. Please set it in your .env file or environment.")
+            self.vectorstore = setup_pinecone(index_name, embedding, 768, 'cosine')
+        else:
+            self.vectorstore = setup_chroma(index_name, embedding, self.cache_dir)
+        namespace = f'{db_name}/{index_name}'
+        self.record_manager = SQLRecordManager(namespace,
+                                               db_url=f'sqlite:///{self.cache_dir}/record_manager_cache.sql')
+        self.record_manager.create_schema()
+    def index(self, uploaded_file):
+        directory = extract_pdf(uploaded_file)
+        docs = load_pdf_directory(directory)
+        chunks = split_pdf(docs)
+        index(
+            docs_source=chunks,
+            record_manager=self.record_manager,
+            vector_store=self.vectorstore,
+            cleanup='full',
+            source_id_key='source'
+        )
+        for file in os.listdir(directory):
+            os.remove(os.path.join(directory, file))
+    def as_retriever(self):
+        return self.vectorstore.as_retriever()

src/vqa.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import warnings
+warnings.filterwarnings("ignore", category=UserWarning, module="transformers")
+warnings.filterwarnings("ignore", category=UserWarning, module="torchaudio")
+import requests
+from PIL import Image
+from transformers import BlipProcessor, BlipForQuestionAnswering, Wav2Vec2Processor, Wav2Vec2ForCTC
+import os
+import torchaudio
+processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
+model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base")
+audio_processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
+audio_model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
+def answer_visual_question(image_path_or_url: str, question: str) -> str:
+    if os.path.isfile(image_path_or_url):
+        raw_image = Image.open(image_path_or_url).convert('RGB')
+    else:
+        raw_image = Image.open(requests.get(image_path_or_url, stream=True).raw).convert('RGB')
+    inputs = processor(raw_image, question, return_tensors="pt")
+    out = model.generate(**inputs)
+    return processor.decode(out[0], skip_special_tokens=True)
+def transcribe_audio(audio_path: str) -> str:
+    waveform, sample_rate = torchaudio.load(audio_path)
+    inputs = audio_processor(waveform, sampling_rate=sample_rate, return_tensors="pt", padding=True)
+    with torch.no_grad():
+        logits = audio_model(inputs.input_values).logits
+    predicted_ids = torch.argmax(logits, dim=-1)
+    transcription = audio_processor.batch_decode(predicted_ids)
+    return transcription[0]