Spaces:

courtneyf2
/

DIVERSIFAIR

Sleeping

App Files Files Community

DIVERSIFAIR / app_new.py

courtneyf2

Update app_new.py

6aa4f74 verified 18 days ago

raw

history blame contribute delete

9.69 kB

	import os
	import pandas as pd
	from dotenv import load_dotenv
	import gradio as gr
	from langchain_huggingface import HuggingFaceEmbeddings
	from langchain_community.vectorstores import FAISS
	from rag_query import ask_question_with_llm

	load_dotenv()


	class EnhancedRAGSystem:
	def __init__(self):
	self.vectorstore = None
	self.embedding_model = None
	self.metadata_df = None
	self.demo_mode = False
	self.initialize_system()

	def initialize_system(self):
	try:
	print("Initialising RAG System...")

	if os.path.exists("metadata.csv"):
	self.metadata_df = pd.read_csv("metadata.csv")
	print(f"Loaded metadata for {len(self.metadata_df)} documents")
	else:
	print("ERROR: metadata.csv not found")
	self.demo_mode = True
	return

	openai_api_key = os.getenv("OPENAI_API_KEY")
	if not openai_api_key:
	print("ERROR: OPENAI_API_KEY not found")
	self.demo_mode = True
	return

	print("Loading embedding model...")
	self.embedding_model = HuggingFaceEmbeddings(
	model_name="sentence-transformers/all-mpnet-base-v2",
	model_kwargs={"device": "cpu"},
	encode_kwargs={"normalize_embeddings": True},
	)
	print("Embedding model loaded")

	# Load vectorstore
	vectorstore_path = "."
	if not os.path.exists(vectorstore_path):
	print(f"ERROR: {vectorstore_path} directory not found")
	self.demo_mode = True
	return

	print("Loading vectorstore...")
	self.vectorstore = FAISS.load_local(
	vectorstore_path,
	self.embedding_model,
	allow_dangerous_deserialization=True,
	)
	print(
	f"Vectorstore loaded with {self.vectorstore.index.ntotal} documents"
	)
	print("System initialised successfully!")

	except Exception as e:
	print(f"ERROR initialising system: {e}")
	import traceback

	traceback.print_exc()
	self.demo_mode = True

	def query(self, question: str):
	if not question.strip():
	return "Please enter a question.", ""

	if self.demo_mode or not self.vectorstore:
	return self._demo_response(), self._demo_citations()

	try:
	print(f"\nQuery: {question}")

	result = ask_question_with_llm(
	vectorstore=self.vectorstore,
	question=question,
	metadata_df=self.metadata_df,
	entity=None,
	k=10,
	model_name="gpt-4o-mini",
	)

	response = result["answer"]

	# Group sources by citation to deduplicate
	seen_citations = {}
	citation_order = []

	for source in result["sources"]:
	citation = source["citation"]
	entity = source["entity"]
	key = f"{citation}\|{entity}"

	if key not in seen_citations:
	seen_citations[key] = {
	"citation": citation,
	"entity": entity,
	"source_numbers": [source["number"]],
	}
	citation_order.append(key)
	else:
	seen_citations[key]["source_numbers"].append(source["number"])

	# Format deduplicated citations
	citations_list = []
	for key in citation_order:
	group = seen_citations[key]
	source_nums = ", ".join([f"{n}" for n in group["source_numbers"]])
	citations_list.append(
	f"[{source_nums}] {group['citation']}\n Jurisdiction: {group['entity']}"
	)

	citations_text = "\n\n".join(citations_list)
	print(f"Generated response with {len(result['sources'])} sources")

	return response, citations_text

	except Exception as e:
	print(f"ERROR: {str(e)}")
	import traceback

	traceback.print_exc()
	return f"Error processing query: {str(e)}", ""

	except Exception as e:
	print(f"ERROR: {str(e)}")
	import traceback

	traceback.print_exc()
	return f"Error processing query: {str(e)}", ""

	def _demo_response(self):
	return """Demo Mode

	The system is not fully initialized. Possible issues:
	- Vectorstore files are missing
	- metadata.csv file is missing
	- OpenAI API key is not configured

	Please check the logs for specific errors."""

	def _demo_citations(self):
	return "[Demo Mode] No citations available"


	# Initialize system
	print("=" * 60)
	print("Starting RAG System...")
	print("=" * 60)
	rag_system = EnhancedRAGSystem()


	def process_query(message, history):
	"""Process user query and return updated history with citations"""
	if not message.strip():
	return history, ""

	response, citations = rag_system.query(message)
	history.append((message, response))

	return history, citations


	custom_css = """
	@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');

	.gradio-container {
	max-width: 1400px !important;
	margin: 0 auto;
	font-family: 'Inter', sans-serif !important;
	}

	.gradio-container h1 {
	font-family: 'Inter', sans-serif !important;
	font-weight: 700 !important;
	font-size: 2.5rem !important;
	color: #1a202c !important;
	}

	.message.user {
	background: #e6f3ff !important;
	color: #1a365d !important;
	border: 1px solid #bee3f8 !important;
	border-radius: 12px !important;
	font-family: 'Inter', sans-serif !important;
	padding: 12px 16px !important;
	}

	.message.bot {
	background: #f7fafc !important;
	color: #1a202c !important;
	border: 1px solid #e2e8f0 !important;
	border-radius: 12px !important;
	font-family: 'Inter', sans-serif !important;
	line-height: 1.6 !important;
	padding: 12px 16px !important;
	}

	.gr-textbox textarea, .gr-textbox input {
	font-family: 'Inter', sans-serif !important;
	font-size: 14px !important;
	border: 1px solid #d1d5db !important;
	border-radius: 8px !important;
	padding: 12px 16px !important;
	}

	.gr-button {
	font-family: 'Inter', sans-serif !important;
	font-weight: 500 !important;
	border-radius: 8px !important;
	padding: 10px 20px !important;
	}

	.gr-button.primary {
	background: #3b82f6 !important;
	color: white !important;
	border: none !important;
	}

	.gr-button.secondary {
	background: #f9fafb !important;
	color: #374151 !important;
	border: 1px solid #d1d5db !important;
	}
	"""

	with gr.Blocks(
	title="DiversiFAIR AI Regulations Chat Model",
	theme=gr.themes.Soft(),
	css=custom_css,
	) as demo:

	gr.Markdown(
	"""
	# DiversiFAIR AI Regulations Chat Model

	Ask questions about AI regulations, data protection laws, and policy documents from around the world.
	"""
	)

	with gr.Row():
	with gr.Column(scale=2):
	chatbot = gr.Chatbot(
	label="Research Conversation",
	height=500,
	show_copy_button=True,
	container=True,
	)

	with gr.Row():
	msg = gr.Textbox(
	label="Your Question",
	placeholder="e.g., What does Article 5 of the AI Act prohibit?",
	container=True,
	scale=4,
	)
	submit_btn = gr.Button("Search", scale=1, variant="primary")

	clear_btn = gr.Button("Clear Chat", variant="secondary")

	with gr.Column(scale=1):
	sources_box = gr.Textbox(
	label="Sources & Citations",
	lines=15,
	interactive=False,
	container=True,
	placeholder="Sources and citations will appear here...",
	)

	gr.Markdown(
	"""
	### Example Questions:

	EU AI Act:
	- How does the EU AI Act define high-risk AI systems?
	- What are the transparency requirements in the AI Act?
	- What does Article 5 of the AI Act prohibit?
	- Summarize Article 30 of the AI Act
	- What is GDPR Article 6 about?

	GDPR & Privacy:
	- What are the key principles of GDPR?
	- What consent requirements exist for personal data processing?

	Comparing Jurisdictions:
	- How do different countries regulate facial recognition?
	- What are the global approaches to AI governance?
	"""
	)

	submit_btn.click(
	process_query, inputs=[msg, chatbot], outputs=[chatbot, sources_box]
	).then(lambda: "", outputs=[msg])

	msg.submit(
	process_query, inputs=[msg, chatbot], outputs=[chatbot, sources_box]
	).then(lambda: "", outputs=[msg])

	clear_btn.click(lambda: ([], ""), outputs=[chatbot, sources_box])

	gr.Markdown(
	"""
	---
	Legal Disclaimer: This system provides information for research and educational purposes only.
	Always consult official legal sources and qualified legal professionals for authoritative legal guidance.

	Built for academic research purposes
	"""
	)


	demo.launch(server_name="0.0.0.0", server_port=7860)