File size: 3,191 Bytes
c025d72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e0c3ede
 
 
c025d72
 
e0c3ede
c025d72
 
 
e0c3ede
c025d72
 
 
e0c3ede
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import os
import requests
import gradio as gr
from groq import Groq

# LangChain components
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

# --- 1. SETUP & API KEYS ---
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
client = Groq(api_key=GROQ_API_KEY)

GDRIVE_LINKS = [
    "https://drive.google.com/file/d/12bS7b-Q3qdbnwCRcTynXjMj1IyKFzBJl/view?usp=sharing"
]

def download_gdrive_pdf(url, output_path):
    try:
        file_id = url.split('/')[-2]
        download_url = f'https://drive.google.com/uc?export=download&id={file_id}'
        response = requests.get(download_url)
        if response.status_code == 200:
            with open(output_path, 'wb') as f:
                f.write(response.content)
            return True
    except:
        return False
    return False

# --- 2. KNOWLEDGE BASE INITIALIZATION ---
print("Initializing Knowledge Base...")
all_chunks = []
text_splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=150)

for i, link in enumerate(GDRIVE_LINKS):
    filename = f"doc_{i}.pdf"
    if download_gdrive_pdf(link, filename):
        loader = PyPDFLoader(filename)
        all_chunks.extend(text_splitter.split_documents(loader.load()))

embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vector_db = FAISS.from_documents(all_chunks, embeddings)
print("System Ready.")

# --- 3. RAG LOGIC ---
def respond(message, history):
    docs = vector_db.similarity_search(message, k=5)
    context = "\n\n".join([doc.page_content for doc in docs])
    
    system_prompt = f"""
    You are a professional Knowledge Assistant. 
    1. Answer the question using ONLY the provided context.
    2. If the answer is not in the context, say: "I'm sorry, I couldn't find that in the documents."
    3. Be concise and factual.
    
    CONTEXT:
    {context}
    """

    chat_completion = client.chat.completions.create(
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": message},
        ],
        model="llama-3.3-70b-versatile",
        temperature=0.1,
    )
    return chat_completion.choices[0].message.content

# --- 4. MODERN UI ---
custom_css = """
footer {visibility: hidden}
.gradio-container { background-color: #fcfcfc; }
#chatbot-container { border-radius: 12px; box-shadow: 0 2px 10px rgba(0,0,0,0.05); }
"""

# Theme and CSS move to launch() or can be defined here if using newer syntax
with gr.Blocks() as demo:
    gr.Markdown("# 📑 Intellect-Doc AI")
    gr.Markdown("Ask questions based on your specialized document library.")
    
    # Removed type="messages" as it's now default
    chatbot = gr.ChatInterface(
        fn=respond,
        chatbot=gr.Chatbot(height=550, elem_id="chatbot-container"),
        textbox=gr.Textbox(placeholder="Ask me anything...", container=False, scale=7)
    )

if __name__ == "__main__":
    # Theme and CSS applied here to fix the UserWarning
    demo.launch(theme=gr.themes.Soft(primary_hue="indigo"), css=custom_css)