DocuVortex / app.py
bkbilal09's picture
Update app.py
8dfc363 verified
import os
import requests
import warnings
import gradio as gr
from groq import Groq
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
# Silent mode for technical logs
warnings.filterwarnings("ignore")
# --- 1. CONFIGURATION & SECRETS ---
GROQ_API_KEY = os.environ.get("MY_GROQ_SECRET")
# Initialize client safely
if GROQ_API_KEY:
client = Groq(api_key=GROQ_API_KEY)
else:
client = None
# HIDDEN DATA SOURCE
GDRIVE_LINKS = [
"https://drive.google.com/file/d/10D3uJqBYG9gMWsNHcpTW4I6BKmA2otfH/view?usp=sharing"
]
# --- 2. KNOWLEDGE BASE INITIALIZATION ---
def download_gdrive_pdf(url, output_path):
try:
file_id = url.split('/')[-2]
download_url = f'https://drive.google.com/uc?export=download&id={file_id}'
response = requests.get(download_url)
if response.status_code == 200:
with open(output_path, 'wb') as f:
f.write(response.content)
return True
except:
return False
return False
print("Starting document indexing...")
all_chunks = []
text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=150)
for i, link in enumerate(GDRIVE_LINKS):
filename = f"temp_doc_{i}.pdf"
if download_gdrive_pdf(link, filename):
try:
loader = PyPDFLoader(filename)
all_chunks.extend(text_splitter.split_documents(loader.load()))
finally:
if os.path.exists(filename):
os.remove(filename)
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vector_db = FAISS.from_documents(all_chunks, embeddings)
print("Index complete.")
# --- 3. RAG LOGIC ---
def respond(message, history):
if not client:
return "Error: MY_GROQ_SECRET not found in Space settings."
# Retrieve relevant data
docs = vector_db.similarity_search(message, k=5)
context = "\n\n".join([doc.page_content for doc in docs])
system_prompt = f"""
You are Bilal's Research Assistant.
1. Answer ONLY using the context provided below.
2. If the answer is NOT in the context, say: "Answer not found in provided documents."
3. Keep responses concise and factual.
CONTEXT:
{context}
"""
chat_completion = client.chat.completions.create(
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": message}
],
model="llama-3.3-70b-versatile",
temperature=0.1,
)
return chat_completion.choices[0].message.content
# --- 4. UI DESIGN ---
custom_css = """
body { background-color: #0f172a; }
.gradio-container { max-width: 800px !important; margin: auto; padding-top: 20px; }
#title { text-align: center; color: #38bdf8; font-weight: 800; font-size: 2em; }
#subtitle { text-align: center; color: #94a3b8; margin-bottom: 20px; }
footer { display: none !important; }
"""
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), css=custom_css) as demo:
gr.HTML("<h1 id='title'>🌀 DocuVortex</h1>")
gr.HTML("<p id='subtitle'>Bilal's Research AI: Strict Knowledge Retrieval</p>")
# Simple Interface to avoid version-specific keyword crashes
gr.ChatInterface(
fn=respond,
chatbot=gr.Chatbot(height=500),
textbox=gr.Textbox(placeholder="Ask Bilal's AI a question...", container=False, scale=7),
)
if __name__ == "__main__":
demo.launch()