Spaces:
Sleeping
Sleeping
File size: 3,191 Bytes
c025d72 e0c3ede c025d72 e0c3ede c025d72 e0c3ede c025d72 e0c3ede | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 | import os
import requests
import gradio as gr
from groq import Groq
# LangChain components
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
# --- 1. SETUP & API KEYS ---
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
client = Groq(api_key=GROQ_API_KEY)
GDRIVE_LINKS = [
"https://drive.google.com/file/d/12bS7b-Q3qdbnwCRcTynXjMj1IyKFzBJl/view?usp=sharing"
]
def download_gdrive_pdf(url, output_path):
try:
file_id = url.split('/')[-2]
download_url = f'https://drive.google.com/uc?export=download&id={file_id}'
response = requests.get(download_url)
if response.status_code == 200:
with open(output_path, 'wb') as f:
f.write(response.content)
return True
except:
return False
return False
# --- 2. KNOWLEDGE BASE INITIALIZATION ---
print("Initializing Knowledge Base...")
all_chunks = []
text_splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=150)
for i, link in enumerate(GDRIVE_LINKS):
filename = f"doc_{i}.pdf"
if download_gdrive_pdf(link, filename):
loader = PyPDFLoader(filename)
all_chunks.extend(text_splitter.split_documents(loader.load()))
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vector_db = FAISS.from_documents(all_chunks, embeddings)
print("System Ready.")
# --- 3. RAG LOGIC ---
def respond(message, history):
docs = vector_db.similarity_search(message, k=5)
context = "\n\n".join([doc.page_content for doc in docs])
system_prompt = f"""
You are a professional Knowledge Assistant.
1. Answer the question using ONLY the provided context.
2. If the answer is not in the context, say: "I'm sorry, I couldn't find that in the documents."
3. Be concise and factual.
CONTEXT:
{context}
"""
chat_completion = client.chat.completions.create(
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": message},
],
model="llama-3.3-70b-versatile",
temperature=0.1,
)
return chat_completion.choices[0].message.content
# --- 4. MODERN UI ---
custom_css = """
footer {visibility: hidden}
.gradio-container { background-color: #fcfcfc; }
#chatbot-container { border-radius: 12px; box-shadow: 0 2px 10px rgba(0,0,0,0.05); }
"""
# Theme and CSS move to launch() or can be defined here if using newer syntax
with gr.Blocks() as demo:
gr.Markdown("# 📑 Intellect-Doc AI")
gr.Markdown("Ask questions based on your specialized document library.")
# Removed type="messages" as it's now default
chatbot = gr.ChatInterface(
fn=respond,
chatbot=gr.Chatbot(height=550, elem_id="chatbot-container"),
textbox=gr.Textbox(placeholder="Ask me anything...", container=False, scale=7)
)
if __name__ == "__main__":
# Theme and CSS applied here to fix the UserWarning
demo.launch(theme=gr.themes.Soft(primary_hue="indigo"), css=custom_css) |