DIVERSIFAIR / app_new.py
courtneyf2's picture
Update app_new.py
6aa4f74 verified
import os
import pandas as pd
from dotenv import load_dotenv
import gradio as gr
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from rag_query import ask_question_with_llm
load_dotenv()
class EnhancedRAGSystem:
def __init__(self):
self.vectorstore = None
self.embedding_model = None
self.metadata_df = None
self.demo_mode = False
self.initialize_system()
def initialize_system(self):
try:
print("Initialising RAG System...")
if os.path.exists("metadata.csv"):
self.metadata_df = pd.read_csv("metadata.csv")
print(f"Loaded metadata for {len(self.metadata_df)} documents")
else:
print("ERROR: metadata.csv not found")
self.demo_mode = True
return
openai_api_key = os.getenv("OPENAI_API_KEY")
if not openai_api_key:
print("ERROR: OPENAI_API_KEY not found")
self.demo_mode = True
return
print("Loading embedding model...")
self.embedding_model = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-mpnet-base-v2",
model_kwargs={"device": "cpu"},
encode_kwargs={"normalize_embeddings": True},
)
print("Embedding model loaded")
# Load vectorstore
vectorstore_path = "."
if not os.path.exists(vectorstore_path):
print(f"ERROR: {vectorstore_path} directory not found")
self.demo_mode = True
return
print("Loading vectorstore...")
self.vectorstore = FAISS.load_local(
vectorstore_path,
self.embedding_model,
allow_dangerous_deserialization=True,
)
print(
f"Vectorstore loaded with {self.vectorstore.index.ntotal} documents"
)
print("System initialised successfully!")
except Exception as e:
print(f"ERROR initialising system: {e}")
import traceback
traceback.print_exc()
self.demo_mode = True
def query(self, question: str):
if not question.strip():
return "Please enter a question.", ""
if self.demo_mode or not self.vectorstore:
return self._demo_response(), self._demo_citations()
try:
print(f"\nQuery: {question}")
result = ask_question_with_llm(
vectorstore=self.vectorstore,
question=question,
metadata_df=self.metadata_df,
entity=None,
k=10,
model_name="gpt-4o-mini",
)
response = result["answer"]
# Group sources by citation to deduplicate
seen_citations = {}
citation_order = []
for source in result["sources"]:
citation = source["citation"]
entity = source["entity"]
key = f"{citation}|{entity}"
if key not in seen_citations:
seen_citations[key] = {
"citation": citation,
"entity": entity,
"source_numbers": [source["number"]],
}
citation_order.append(key)
else:
seen_citations[key]["source_numbers"].append(source["number"])
# Format deduplicated citations
citations_list = []
for key in citation_order:
group = seen_citations[key]
source_nums = ", ".join([f"{n}" for n in group["source_numbers"]])
citations_list.append(
f"[{source_nums}] {group['citation']}\n Jurisdiction: {group['entity']}"
)
citations_text = "\n\n".join(citations_list)
print(f"Generated response with {len(result['sources'])} sources")
return response, citations_text
except Exception as e:
print(f"ERROR: {str(e)}")
import traceback
traceback.print_exc()
return f"Error processing query: {str(e)}", ""
except Exception as e:
print(f"ERROR: {str(e)}")
import traceback
traceback.print_exc()
return f"Error processing query: {str(e)}", ""
def _demo_response(self):
return """**Demo Mode**
The system is not fully initialized. Possible issues:
- Vectorstore files are missing
- metadata.csv file is missing
- OpenAI API key is not configured
Please check the logs for specific errors."""
def _demo_citations(self):
return "[Demo Mode] No citations available"
# Initialize system
print("=" * 60)
print("Starting RAG System...")
print("=" * 60)
rag_system = EnhancedRAGSystem()
def process_query(message, history):
"""Process user query and return updated history with citations"""
if not message.strip():
return history, ""
response, citations = rag_system.query(message)
history.append((message, response))
return history, citations
custom_css = """
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
.gradio-container {
max-width: 1400px !important;
margin: 0 auto;
font-family: 'Inter', sans-serif !important;
}
.gradio-container h1 {
font-family: 'Inter', sans-serif !important;
font-weight: 700 !important;
font-size: 2.5rem !important;
color: #1a202c !important;
}
.message.user {
background: #e6f3ff !important;
color: #1a365d !important;
border: 1px solid #bee3f8 !important;
border-radius: 12px !important;
font-family: 'Inter', sans-serif !important;
padding: 12px 16px !important;
}
.message.bot {
background: #f7fafc !important;
color: #1a202c !important;
border: 1px solid #e2e8f0 !important;
border-radius: 12px !important;
font-family: 'Inter', sans-serif !important;
line-height: 1.6 !important;
padding: 12px 16px !important;
}
.gr-textbox textarea, .gr-textbox input {
font-family: 'Inter', sans-serif !important;
font-size: 14px !important;
border: 1px solid #d1d5db !important;
border-radius: 8px !important;
padding: 12px 16px !important;
}
.gr-button {
font-family: 'Inter', sans-serif !important;
font-weight: 500 !important;
border-radius: 8px !important;
padding: 10px 20px !important;
}
.gr-button.primary {
background: #3b82f6 !important;
color: white !important;
border: none !important;
}
.gr-button.secondary {
background: #f9fafb !important;
color: #374151 !important;
border: 1px solid #d1d5db !important;
}
"""
with gr.Blocks(
title="DiversiFAIR AI Regulations Chat Model",
theme=gr.themes.Soft(),
css=custom_css,
) as demo:
gr.Markdown(
"""
# DiversiFAIR AI Regulations Chat Model
Ask questions about AI regulations, data protection laws, and policy documents from around the world.
"""
)
with gr.Row():
with gr.Column(scale=2):
chatbot = gr.Chatbot(
label="Research Conversation",
height=500,
show_copy_button=True,
container=True,
)
with gr.Row():
msg = gr.Textbox(
label="Your Question",
placeholder="e.g., What does Article 5 of the AI Act prohibit?",
container=True,
scale=4,
)
submit_btn = gr.Button("Search", scale=1, variant="primary")
clear_btn = gr.Button("Clear Chat", variant="secondary")
with gr.Column(scale=1):
sources_box = gr.Textbox(
label="Sources & Citations",
lines=15,
interactive=False,
container=True,
placeholder="Sources and citations will appear here...",
)
gr.Markdown(
"""
### Example Questions:
**EU AI Act:**
- How does the EU AI Act define high-risk AI systems?
- What are the transparency requirements in the AI Act?
- What does Article 5 of the AI Act prohibit?
- Summarize Article 30 of the AI Act
- What is GDPR Article 6 about?
**GDPR & Privacy:**
- What are the key principles of GDPR?
- What consent requirements exist for personal data processing?
**Comparing Jurisdictions:**
- How do different countries regulate facial recognition?
- What are the global approaches to AI governance?
"""
)
submit_btn.click(
process_query, inputs=[msg, chatbot], outputs=[chatbot, sources_box]
).then(lambda: "", outputs=[msg])
msg.submit(
process_query, inputs=[msg, chatbot], outputs=[chatbot, sources_box]
).then(lambda: "", outputs=[msg])
clear_btn.click(lambda: ([], ""), outputs=[chatbot, sources_box])
gr.Markdown(
"""
---
**Legal Disclaimer:** This system provides information for research and educational purposes only.
Always consult official legal sources and qualified legal professionals for authoritative legal guidance.
**Built for academic research purposes**
"""
)
demo.launch(server_name="0.0.0.0", server_port=7860)