Spaces:
Sleeping
Sleeping
| import os | |
| import pandas as pd | |
| from dotenv import load_dotenv | |
| import gradio as gr | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import FAISS | |
| from rag_query import ask_question_with_llm | |
| load_dotenv() | |
| class EnhancedRAGSystem: | |
| def __init__(self): | |
| self.vectorstore = None | |
| self.embedding_model = None | |
| self.metadata_df = None | |
| self.demo_mode = False | |
| self.initialize_system() | |
| def initialize_system(self): | |
| try: | |
| print("Initialising RAG System...") | |
| if os.path.exists("metadata.csv"): | |
| self.metadata_df = pd.read_csv("metadata.csv") | |
| print(f"Loaded metadata for {len(self.metadata_df)} documents") | |
| else: | |
| print("ERROR: metadata.csv not found") | |
| self.demo_mode = True | |
| return | |
| openai_api_key = os.getenv("OPENAI_API_KEY") | |
| if not openai_api_key: | |
| print("ERROR: OPENAI_API_KEY not found") | |
| self.demo_mode = True | |
| return | |
| print("Loading embedding model...") | |
| self.embedding_model = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-mpnet-base-v2", | |
| model_kwargs={"device": "cpu"}, | |
| encode_kwargs={"normalize_embeddings": True}, | |
| ) | |
| print("Embedding model loaded") | |
| # Load vectorstore | |
| vectorstore_path = "." | |
| if not os.path.exists(vectorstore_path): | |
| print(f"ERROR: {vectorstore_path} directory not found") | |
| self.demo_mode = True | |
| return | |
| print("Loading vectorstore...") | |
| self.vectorstore = FAISS.load_local( | |
| vectorstore_path, | |
| self.embedding_model, | |
| allow_dangerous_deserialization=True, | |
| ) | |
| print( | |
| f"Vectorstore loaded with {self.vectorstore.index.ntotal} documents" | |
| ) | |
| print("System initialised successfully!") | |
| except Exception as e: | |
| print(f"ERROR initialising system: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| self.demo_mode = True | |
| def query(self, question: str): | |
| if not question.strip(): | |
| return "Please enter a question.", "" | |
| if self.demo_mode or not self.vectorstore: | |
| return self._demo_response(), self._demo_citations() | |
| try: | |
| print(f"\nQuery: {question}") | |
| result = ask_question_with_llm( | |
| vectorstore=self.vectorstore, | |
| question=question, | |
| metadata_df=self.metadata_df, | |
| entity=None, | |
| k=10, | |
| model_name="gpt-4o-mini", | |
| ) | |
| response = result["answer"] | |
| # Group sources by citation to deduplicate | |
| seen_citations = {} | |
| citation_order = [] | |
| for source in result["sources"]: | |
| citation = source["citation"] | |
| entity = source["entity"] | |
| key = f"{citation}|{entity}" | |
| if key not in seen_citations: | |
| seen_citations[key] = { | |
| "citation": citation, | |
| "entity": entity, | |
| "source_numbers": [source["number"]], | |
| } | |
| citation_order.append(key) | |
| else: | |
| seen_citations[key]["source_numbers"].append(source["number"]) | |
| # Format deduplicated citations | |
| citations_list = [] | |
| for key in citation_order: | |
| group = seen_citations[key] | |
| source_nums = ", ".join([f"{n}" for n in group["source_numbers"]]) | |
| citations_list.append( | |
| f"[{source_nums}] {group['citation']}\n Jurisdiction: {group['entity']}" | |
| ) | |
| citations_text = "\n\n".join(citations_list) | |
| print(f"Generated response with {len(result['sources'])} sources") | |
| return response, citations_text | |
| except Exception as e: | |
| print(f"ERROR: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| return f"Error processing query: {str(e)}", "" | |
| except Exception as e: | |
| print(f"ERROR: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| return f"Error processing query: {str(e)}", "" | |
| def _demo_response(self): | |
| return """**Demo Mode** | |
| The system is not fully initialized. Possible issues: | |
| - Vectorstore files are missing | |
| - metadata.csv file is missing | |
| - OpenAI API key is not configured | |
| Please check the logs for specific errors.""" | |
| def _demo_citations(self): | |
| return "[Demo Mode] No citations available" | |
| # Initialize system | |
| print("=" * 60) | |
| print("Starting RAG System...") | |
| print("=" * 60) | |
| rag_system = EnhancedRAGSystem() | |
| def process_query(message, history): | |
| """Process user query and return updated history with citations""" | |
| if not message.strip(): | |
| return history, "" | |
| response, citations = rag_system.query(message) | |
| history.append((message, response)) | |
| return history, citations | |
| custom_css = """ | |
| @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap'); | |
| .gradio-container { | |
| max-width: 1400px !important; | |
| margin: 0 auto; | |
| font-family: 'Inter', sans-serif !important; | |
| } | |
| .gradio-container h1 { | |
| font-family: 'Inter', sans-serif !important; | |
| font-weight: 700 !important; | |
| font-size: 2.5rem !important; | |
| color: #1a202c !important; | |
| } | |
| .message.user { | |
| background: #e6f3ff !important; | |
| color: #1a365d !important; | |
| border: 1px solid #bee3f8 !important; | |
| border-radius: 12px !important; | |
| font-family: 'Inter', sans-serif !important; | |
| padding: 12px 16px !important; | |
| } | |
| .message.bot { | |
| background: #f7fafc !important; | |
| color: #1a202c !important; | |
| border: 1px solid #e2e8f0 !important; | |
| border-radius: 12px !important; | |
| font-family: 'Inter', sans-serif !important; | |
| line-height: 1.6 !important; | |
| padding: 12px 16px !important; | |
| } | |
| .gr-textbox textarea, .gr-textbox input { | |
| font-family: 'Inter', sans-serif !important; | |
| font-size: 14px !important; | |
| border: 1px solid #d1d5db !important; | |
| border-radius: 8px !important; | |
| padding: 12px 16px !important; | |
| } | |
| .gr-button { | |
| font-family: 'Inter', sans-serif !important; | |
| font-weight: 500 !important; | |
| border-radius: 8px !important; | |
| padding: 10px 20px !important; | |
| } | |
| .gr-button.primary { | |
| background: #3b82f6 !important; | |
| color: white !important; | |
| border: none !important; | |
| } | |
| .gr-button.secondary { | |
| background: #f9fafb !important; | |
| color: #374151 !important; | |
| border: 1px solid #d1d5db !important; | |
| } | |
| """ | |
| with gr.Blocks( | |
| title="DiversiFAIR AI Regulations Chat Model", | |
| theme=gr.themes.Soft(), | |
| css=custom_css, | |
| ) as demo: | |
| gr.Markdown( | |
| """ | |
| # DiversiFAIR AI Regulations Chat Model | |
| Ask questions about AI regulations, data protection laws, and policy documents from around the world. | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| chatbot = gr.Chatbot( | |
| label="Research Conversation", | |
| height=500, | |
| show_copy_button=True, | |
| container=True, | |
| ) | |
| with gr.Row(): | |
| msg = gr.Textbox( | |
| label="Your Question", | |
| placeholder="e.g., What does Article 5 of the AI Act prohibit?", | |
| container=True, | |
| scale=4, | |
| ) | |
| submit_btn = gr.Button("Search", scale=1, variant="primary") | |
| clear_btn = gr.Button("Clear Chat", variant="secondary") | |
| with gr.Column(scale=1): | |
| sources_box = gr.Textbox( | |
| label="Sources & Citations", | |
| lines=15, | |
| interactive=False, | |
| container=True, | |
| placeholder="Sources and citations will appear here...", | |
| ) | |
| gr.Markdown( | |
| """ | |
| ### Example Questions: | |
| **EU AI Act:** | |
| - How does the EU AI Act define high-risk AI systems? | |
| - What are the transparency requirements in the AI Act? | |
| - What does Article 5 of the AI Act prohibit? | |
| - Summarize Article 30 of the AI Act | |
| - What is GDPR Article 6 about? | |
| **GDPR & Privacy:** | |
| - What are the key principles of GDPR? | |
| - What consent requirements exist for personal data processing? | |
| **Comparing Jurisdictions:** | |
| - How do different countries regulate facial recognition? | |
| - What are the global approaches to AI governance? | |
| """ | |
| ) | |
| submit_btn.click( | |
| process_query, inputs=[msg, chatbot], outputs=[chatbot, sources_box] | |
| ).then(lambda: "", outputs=[msg]) | |
| msg.submit( | |
| process_query, inputs=[msg, chatbot], outputs=[chatbot, sources_box] | |
| ).then(lambda: "", outputs=[msg]) | |
| clear_btn.click(lambda: ([], ""), outputs=[chatbot, sources_box]) | |
| gr.Markdown( | |
| """ | |
| --- | |
| **Legal Disclaimer:** This system provides information for research and educational purposes only. | |
| Always consult official legal sources and qualified legal professionals for authoritative legal guidance. | |
| **Built for academic research purposes** | |
| """ | |
| ) | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |