import gradio as gr from huggingface_hub import InferenceClient from typing import List, Tuple import fitz from sentence_transformers import SentenceTransformer import numpy as np import faiss client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") #client = InferenceClient("meta-llama/Llama-2-7b-chat-hf") # Placeholder for the app's state class MyApp: def __init__(self) -> None: self.documents = [] self.embeddings = None self.index = None self.load_pdf("Abhijith N_Resume.pdf") self.build_vector_db() def load_pdf(self, file_path: str) -> None: """Extracts text from a PDF file and stores it in the app's documents.""" doc = fitz.open(file_path) self.documents = [] for page_num in range(len(doc)): page = doc[page_num] text = page.get_text() self.documents.append({"page": page_num + 1, "content": text}) print("PDF processed successfully!") def build_vector_db(self) -> None: """Builds a vector database using the content of the PDF.""" print(self.documents) model = SentenceTransformer('all-MiniLM-L6-v2') self.embeddings = model.encode([doc["content"] for doc in self.documents], show_progress_bar=True) self.index = faiss.IndexFlatL2(self.embeddings.shape[1]) self.index.add(np.array(self.embeddings)) print("Vector database built successfully!") def search_documents(self, query: str, k: int = 3) -> List[str]: """Searches for relevant documents using vector similarity.""" model = SentenceTransformer('all-MiniLM-L6-v2') query_embedding = model.encode([query], show_progress_bar=False) D, I = self.index.search(np.array(query_embedding), k) results = [self.documents[i]["content"] for i in I[0]] return results if results else ["No relevant documents found."] app = MyApp() def preprocess_response(response: str) -> str: """Preprocesses the response to make it more polished and empathetic.""" response = response.strip() response = response.replace("\n\n", "\n") response = response.replace(" ,", ",") response = response.replace(" .", ".") response = " ".join(response.split()) if not any(word in response.lower() for word in ["Capa Complaints"]): response = "I'm here to help. " + response return response def shorten_response(response: str) -> str: """Uses the Zephyr model to shorten and refine the response.""" messages = [{"role": "system", "content": "Shorten and refine this response"}, {"role": "user", "content": response}] result = client.chat_completion(messages, max_tokens=512, temperature=0.2, top_p=0.9) return result.choices[0].message['content'].strip() def respond(message: str, history: List[Tuple[str, str]]): system_message = """You are a Q&A assistant named Vasuki. If anyone asks your name, remember to say your name is Vasuki. Please provide a detailed and thorough response to the following query. Ensure that the answer is clear, concise, and includes examples where appropriate. For all other inquiries, your main goal is to provide answers as accurately as possible, based on the instructions and context you have been given. If a question does not match the provided context or is outside the scope of the document, kindly advise the user to ask questions within the context of the document. If the user asks for a summary of the attached document, provide a detailed summary of the uploaded document.""" messages = [{"role": "system", "content": system_message}] for val in history: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) messages.append({"role": "user", "content": message}) # RAG - Retrieve relevant documents if the query suggests exercises or specific information if any(keyword in message.lower() for keyword in ["summary", "skills","how", "what", "does","experience", "technique", "information", "guide", "help", "how to","tell me", "how","tell me","how many","capa","project","company","education","llm","tech","which", "tech stack", "libraries", "frameworks","projects"]): retrieved_docs = app.search_documents(message) context = "\n".join(retrieved_docs) if context.strip(): messages.append({"role": "system", "content": "Relevant documents: " + context}) response = client.chat_completion(messages, max_tokens=1024, temperature=0.7, top_p=0.9) response_content = "".join([choice.message['content'] for choice in response.choices if 'content' in choice.message]) polished_response = preprocess_response(response_content) shortened_response = shorten_response(polished_response) history.append((message, shortened_response)) return history, "" with gr.Blocks() as demo: gr.Markdown("# Vasuki") gr.Markdown( "✨Greetings! I'm Vasuki, your AI tool for communicating with your pdf files✨ " ) chatbot = gr.Chatbot() with gr.Row(): txt_input = gr.Textbox( show_label=False, placeholder="Type your message here...", lines=1 ) submit_btn = gr.Button("Submit", scale=1) refresh_btn = gr.Button("Refresh Chat", scale=1, variant="secondary") example_questions = [ ["Tell me summary of the file"], ["Can you guide me through a project?"], ["How do I understand the experience of the project?"], ["What are the skills that are present?"] ] gr.Examples(examples=example_questions, inputs=[txt_input]) submit_btn.click(fn=respond, inputs=[txt_input, chatbot], outputs=[chatbot, txt_input]) refresh_btn.click(lambda: [], None, chatbot) if __name__ == "__main__": demo.launch()