File size: 5,948 Bytes
45a39db
 
 
36931af
45a39db
 
 
 
8dd5f26
 
45a39db
 
 
 
 
 
 
529038d
45a39db
 
5939e13
 
 
 
 
 
 
 
 
45a39db
945b699
45a39db
5939e13
945b699
45a39db
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75733ef
45a39db
 
 
 
 
0ccb604
 
45a39db
0ccb604
45a39db
529038d
45a39db
 
 
 
 
 
 
 
 
 
 
83933c8
45a39db
 
 
 
 
 
 
 
 
 
 
 
 
 
 
529038d
45a39db
529038d
36931af
45a39db
 
 
 
 
 
 
 
 
 
 
 
 
 
f1da86a
 
 
 
45a39db
 
 
 
 
 
 
 
5939e13
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import gradio as gr
from huggingface_hub import InferenceClient
from typing import List, Tuple
import fitz  
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss

client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
#client = InferenceClient("meta-llama/Llama-2-7b-chat-hf")

# Placeholder for the app's state
class MyApp:
    def __init__(self) -> None:
        self.documents = []
        self.embeddings = None
        self.index = None
        self.load_pdf("Abhijith N_Resume.pdf")
        self.build_vector_db()

    def load_pdf(self, file_path: str) -> None:
        """Extracts text from a PDF file and stores it in the app's documents."""
        doc = fitz.open(file_path)
        self.documents = []
        for page_num in range(len(doc)):
            page = doc[page_num]
            text = page.get_text()
            self.documents.append({"page": page_num + 1, "content": text})
        print("PDF processed successfully!")

        
    def build_vector_db(self) -> None:
        """Builds a vector database using the content of the PDF."""
        print(self.documents)
        model = SentenceTransformer('all-MiniLM-L6-v2')
        self.embeddings = model.encode([doc["content"] for doc in self.documents], show_progress_bar=True)
        self.index = faiss.IndexFlatL2(self.embeddings.shape[1])
        self.index.add(np.array(self.embeddings))
        print("Vector database built successfully!")

    def search_documents(self, query: str, k: int = 3) -> List[str]:
        """Searches for relevant documents using vector similarity."""
        model = SentenceTransformer('all-MiniLM-L6-v2')
        query_embedding = model.encode([query], show_progress_bar=False)
        D, I = self.index.search(np.array(query_embedding), k)
        results = [self.documents[i]["content"] for i in I[0]]
        return results if results else ["No relevant documents found."]

app = MyApp()

def preprocess_response(response: str) -> str:
    """Preprocesses the response to make it more polished and empathetic."""
    response = response.strip()
    response = response.replace("\n\n", "\n")
    response = response.replace(" ,", ",")
    response = response.replace(" .", ".")
    response = " ".join(response.split())
    if not any(word in response.lower() for word in ["Capa Complaints"]):
        response = "I'm here to help. " + response
    return response

def shorten_response(response: str) -> str:
    """Uses the Zephyr model to shorten and refine the response."""
    messages = [{"role": "system", "content": "Shorten and refine this response"}, {"role": "user", "content": response}]
    result = client.chat_completion(messages, max_tokens=512, temperature=0.2, top_p=0.9)
    return result.choices[0].message['content'].strip()
    
def respond(message: str, history: List[Tuple[str, str]]):
    system_message = """You are a Q&A assistant named Vasuki. If anyone asks your name, remember to say your name is Vasuki. Please provide a detailed and thorough response to the following query. Ensure that the answer is clear, concise, and includes examples where appropriate. For all other inquiries, your main goal is to provide answers as accurately as possible, based on the instructions and context you have been given. If a question does not match the provided context or is outside the scope of the document, kindly advise the user to ask questions within the context of the document. If the user asks for a summary of the attached document, provide a detailed summary of the uploaded document."""
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    # RAG - Retrieve relevant documents if the query suggests exercises or specific information
    if any(keyword in message.lower() for keyword in ["summary", "skills","how", "what", "does","experience", "technique", "information", "guide", "help", "how to","tell me", "how","tell me","how many","capa","project","company","education","llm","tech","which", "tech stack", "libraries", "frameworks","projects"]):
        retrieved_docs = app.search_documents(message)
        context = "\n".join(retrieved_docs)
        if context.strip():
            messages.append({"role": "system", "content": "Relevant documents: " + context})

    response = client.chat_completion(messages, max_tokens=1024, temperature=0.7, top_p=0.9)
    response_content = "".join([choice.message['content'] for choice in response.choices if 'content' in choice.message])
    
    polished_response = preprocess_response(response_content)
    shortened_response = shorten_response(polished_response)

    history.append((message, shortened_response))
    return history, ""

with gr.Blocks() as demo:
    gr.Markdown("# Vasuki")
    gr.Markdown(
        "✨Greetings! I'm Vasuki, your AI tool for communicating with your pdf files✨ "
        
    )

    chatbot = gr.Chatbot()

    with gr.Row():
        txt_input = gr.Textbox(
            show_label=False,
            placeholder="Type your message here...",
            lines=1
        )
        submit_btn = gr.Button("Submit", scale=1)
        refresh_btn = gr.Button("Refresh Chat", scale=1, variant="secondary")

    example_questions = [
               ["Tell me summary of the file"],
            ["Can you guide me through a project?"],
            ["How do I understand the experience of the project?"],
            ["What are the skills that are present?"]
    ]

    gr.Examples(examples=example_questions, inputs=[txt_input])

    submit_btn.click(fn=respond, inputs=[txt_input, chatbot], outputs=[chatbot, txt_input])
    refresh_btn.click(lambda: [], None, chatbot)

if __name__ == "__main__":
    demo.launch()