Spaces:
Sleeping
Sleeping
File size: 3,590 Bytes
01f6ace 8dfc363 01f6ace 8dfc363 01f6ace 8dfc363 01f6ace 8dfc363 01f6ace 8dfc363 01f6ace 8dfc363 01f6ace 8dfc363 01f6ace e4daca7 01f6ace 8dfc363 01f6ace 8dfc363 e4daca7 01f6ace 8dfc363 01f6ace 8dfc363 01f6ace 80a2fa5 8dfc363 80a2fa5 01f6ace 8dfc363 80a2fa5 8dfc363 01f6ace 8dfc363 80a2fa5 8dfc363 80a2fa5 01f6ace | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 | import os
import requests
import warnings
import gradio as gr
from groq import Groq
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
# Silent mode for technical logs
warnings.filterwarnings("ignore")
# --- 1. CONFIGURATION & SECRETS ---
GROQ_API_KEY = os.environ.get("MY_GROQ_SECRET")
# Initialize client safely
if GROQ_API_KEY:
client = Groq(api_key=GROQ_API_KEY)
else:
client = None
# HIDDEN DATA SOURCE
GDRIVE_LINKS = [
"https://drive.google.com/file/d/10D3uJqBYG9gMWsNHcpTW4I6BKmA2otfH/view?usp=sharing"
]
# --- 2. KNOWLEDGE BASE INITIALIZATION ---
def download_gdrive_pdf(url, output_path):
try:
file_id = url.split('/')[-2]
download_url = f'https://drive.google.com/uc?export=download&id={file_id}'
response = requests.get(download_url)
if response.status_code == 200:
with open(output_path, 'wb') as f:
f.write(response.content)
return True
except:
return False
return False
print("Starting document indexing...")
all_chunks = []
text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=150)
for i, link in enumerate(GDRIVE_LINKS):
filename = f"temp_doc_{i}.pdf"
if download_gdrive_pdf(link, filename):
try:
loader = PyPDFLoader(filename)
all_chunks.extend(text_splitter.split_documents(loader.load()))
finally:
if os.path.exists(filename):
os.remove(filename)
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vector_db = FAISS.from_documents(all_chunks, embeddings)
print("Index complete.")
# --- 3. RAG LOGIC ---
def respond(message, history):
if not client:
return "Error: MY_GROQ_SECRET not found in Space settings."
# Retrieve relevant data
docs = vector_db.similarity_search(message, k=5)
context = "\n\n".join([doc.page_content for doc in docs])
system_prompt = f"""
You are Bilal's Research Assistant.
1. Answer ONLY using the context provided below.
2. If the answer is NOT in the context, say: "Answer not found in provided documents."
3. Keep responses concise and factual.
CONTEXT:
{context}
"""
chat_completion = client.chat.completions.create(
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": message}
],
model="llama-3.3-70b-versatile",
temperature=0.1,
)
return chat_completion.choices[0].message.content
# --- 4. UI DESIGN ---
custom_css = """
body { background-color: #0f172a; }
.gradio-container { max-width: 800px !important; margin: auto; padding-top: 20px; }
#title { text-align: center; color: #38bdf8; font-weight: 800; font-size: 2em; }
#subtitle { text-align: center; color: #94a3b8; margin-bottom: 20px; }
footer { display: none !important; }
"""
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), css=custom_css) as demo:
gr.HTML("<h1 id='title'>🌀 DocuVortex</h1>")
gr.HTML("<p id='subtitle'>Bilal's Research AI: Strict Knowledge Retrieval</p>")
# Simple Interface to avoid version-specific keyword crashes
gr.ChatInterface(
fn=respond,
chatbot=gr.Chatbot(height=500),
textbox=gr.Textbox(placeholder="Ask Bilal's AI a question...", container=False, scale=7),
)
if __name__ == "__main__":
demo.launch() |