Spaces:

bkbilal09
/

DocuVortex

Sleeping

App Files Files Community

DocuVortex / app.py

bkbilal09

Update app.py

8dfc363 verified 25 days ago

raw

history blame contribute delete

3.59 kB

	import os
	import requests
	import warnings
	import gradio as gr
	from groq import Groq
	from langchain_community.document_loaders import PyPDFLoader
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_huggingface import HuggingFaceEmbeddings
	from langchain_community.vectorstores import FAISS

	# Silent mode for technical logs
	warnings.filterwarnings("ignore")

	# --- 1. CONFIGURATION & SECRETS ---
	GROQ_API_KEY = os.environ.get("MY_GROQ_SECRET")
	# Initialize client safely
	if GROQ_API_KEY:
	client = Groq(api_key=GROQ_API_KEY)
	else:
	client = None

	# HIDDEN DATA SOURCE
	GDRIVE_LINKS = [
	"https://drive.google.com/file/d/10D3uJqBYG9gMWsNHcpTW4I6BKmA2otfH/view?usp=sharing"
	]

	# --- 2. KNOWLEDGE BASE INITIALIZATION ---
	def download_gdrive_pdf(url, output_path):
	try:
	file_id = url.split('/')[-2]
	download_url = f'https://drive.google.com/uc?export=download&id={file_id}'
	response = requests.get(download_url)
	if response.status_code == 200:
	with open(output_path, 'wb') as f:
	f.write(response.content)
	return True
	except:
	return False
	return False

	print("Starting document indexing...")
	all_chunks = []
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=150)

	for i, link in enumerate(GDRIVE_LINKS):
	filename = f"temp_doc_{i}.pdf"
	if download_gdrive_pdf(link, filename):
	try:
	loader = PyPDFLoader(filename)
	all_chunks.extend(text_splitter.split_documents(loader.load()))
	finally:
	if os.path.exists(filename):
	os.remove(filename)

	embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
	vector_db = FAISS.from_documents(all_chunks, embeddings)
	print("Index complete.")

	# --- 3. RAG LOGIC ---
	def respond(message, history):
	if not client:
	return "Error: MY_GROQ_SECRET not found in Space settings."

	# Retrieve relevant data
	docs = vector_db.similarity_search(message, k=5)
	context = "\n\n".join([doc.page_content for doc in docs])

	system_prompt = f"""
	You are Bilal's Research Assistant.
	1. Answer ONLY using the context provided below.
	2. If the answer is NOT in the context, say: "Answer not found in provided documents."
	3. Keep responses concise and factual.

	CONTEXT:
	{context}
	"""

	chat_completion = client.chat.completions.create(
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": message}
	],
	model="llama-3.3-70b-versatile",
	temperature=0.1,
	)
	return chat_completion.choices[0].message.content

	# --- 4. UI DESIGN ---
	custom_css = """
	body { background-color: #0f172a; }
	.gradio-container { max-width: 800px !important; margin: auto; padding-top: 20px; }
	#title { text-align: center; color: #38bdf8; font-weight: 800; font-size: 2em; }
	#subtitle { text-align: center; color: #94a3b8; margin-bottom: 20px; }
	footer { display: none !important; }
	"""

	with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), css=custom_css) as demo:
	gr.HTML("<h1 id='title'>🌀 DocuVortex</h1>")
	gr.HTML("<p id='subtitle'>Bilal's Research AI: Strict Knowledge Retrieval</p>")

	# Simple Interface to avoid version-specific keyword crashes
	gr.ChatInterface(
	fn=respond,
	chatbot=gr.Chatbot(height=500),
	textbox=gr.Textbox(placeholder="Ask Bilal's AI a question...", container=False, scale=7),
	)

	if __name__ == "__main__":
	demo.launch()