Spaces:

bkbilal09
/

DocuVortex

Sleeping

File size: 3,590 Bytes

01f6ace
 
 
 
 
 
 
 
 
 
8dfc363
01f6ace
 
8dfc363
01f6ace
8dfc363
 
 
 
 
01f6ace
8dfc363
01f6ace
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8dfc363
01f6ace
 
 
 
8dfc363
01f6ace
 
 
 
 
 
 
 
 
 
8dfc363
01f6ace
e4daca7
01f6ace
8dfc363
 
 
 
01f6ace
 
 
 
8dfc363
e4daca7
01f6ace
8dfc363
01f6ace
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8dfc363
01f6ace
 
80a2fa5
8dfc363
80a2fa5
01f6ace
 
 
8dfc363
80a2fa5
8dfc363
01f6ace
8dfc363
80a2fa5
 
8dfc363
 
80a2fa5
01f6ace

import os
import requests
import warnings
import gradio as gr
from groq import Groq
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

# Silent mode for technical logs
warnings.filterwarnings("ignore")

# --- 1. CONFIGURATION & SECRETS ---
GROQ_API_KEY = os.environ.get("MY_GROQ_SECRET")
# Initialize client safely
if GROQ_API_KEY:
    client = Groq(api_key=GROQ_API_KEY)
else:
    client = None

# HIDDEN DATA SOURCE
GDRIVE_LINKS = [
    "https://drive.google.com/file/d/10D3uJqBYG9gMWsNHcpTW4I6BKmA2otfH/view?usp=sharing"
]

# --- 2. KNOWLEDGE BASE INITIALIZATION ---
def download_gdrive_pdf(url, output_path):
    try:
        file_id = url.split('/')[-2]
        download_url = f'https://drive.google.com/uc?export=download&id={file_id}'
        response = requests.get(download_url)
        if response.status_code == 200:
            with open(output_path, 'wb') as f:
                f.write(response.content)
            return True
    except:
        return False
    return False

print("Starting document indexing...")
all_chunks = []
text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=150)

for i, link in enumerate(GDRIVE_LINKS):
    filename = f"temp_doc_{i}.pdf"
    if download_gdrive_pdf(link, filename):
        try:
            loader = PyPDFLoader(filename)
            all_chunks.extend(text_splitter.split_documents(loader.load()))
        finally:
            if os.path.exists(filename):
                os.remove(filename)

embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vector_db = FAISS.from_documents(all_chunks, embeddings)
print("Index complete.")

# --- 3. RAG LOGIC ---
def respond(message, history):
    if not client:
        return "Error: MY_GROQ_SECRET not found in Space settings."
    
    # Retrieve relevant data
    docs = vector_db.similarity_search(message, k=5)
    context = "\n\n".join([doc.page_content for doc in docs])
    
    system_prompt = f"""
    You are Bilal's Research Assistant. 
    1. Answer ONLY using the context provided below.
    2. If the answer is NOT in the context, say: "Answer not found in provided documents."
    3. Keep responses concise and factual.
    
    CONTEXT:
    {context}
    """

    chat_completion = client.chat.completions.create(
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": message}
        ],
        model="llama-3.3-70b-versatile",
        temperature=0.1,
    )
    return chat_completion.choices[0].message.content

# --- 4. UI DESIGN ---
custom_css = """
body { background-color: #0f172a; }
.gradio-container { max-width: 800px !important; margin: auto; padding-top: 20px; }
#title { text-align: center; color: #38bdf8; font-weight: 800; font-size: 2em; }
#subtitle { text-align: center; color: #94a3b8; margin-bottom: 20px; }
footer { display: none !important; }
"""

with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), css=custom_css) as demo:
    gr.HTML("<h1 id='title'>🌀 DocuVortex</h1>")
    gr.HTML("<p id='subtitle'>Bilal's Research AI: Strict Knowledge Retrieval</p>")
    
    # Simple Interface to avoid version-specific keyword crashes
    gr.ChatInterface(
        fn=respond,
        chatbot=gr.Chatbot(height=500),
        textbox=gr.Textbox(placeholder="Ask Bilal's AI a question...", container=False, scale=7),
    )

if __name__ == "__main__":
    demo.launch()