import gradio as gr import json import re import os import asyncio from pathlib import Path from typing import Dict, Any, List import tempfile import shutil import zipfile import requests # Install and import nano_graphrag from local source import subprocess import sys def install_nano_graphrag(): """Add nano-graphrag to Python path as simple module""" try: # Add nano-graphrag directory to Python path nano_dir = os.path.join(os.getcwd(), "nano-graphrag") if nano_dir not in sys.path: sys.path.insert(0, nano_dir) print("✅ nano-graphrag added to Python path") return True except Exception as e: print(f"⚠️ Failed to add nano-graphrag to path: {e}") return False # Try to import nano_graphrag, install if needed try: from nano_graphrag import GraphRAG, QueryParam from nano_graphrag._llm import gpt_4o_mini_complete NANO_GRAPHRAG_AVAILABLE = True print("✅ nano-graphrag imported successfully") except ImportError as e: print(f"⚠️ nano-graphrag not available, trying to install: {e}") if install_nano_graphrag(): try: from nano_graphrag import GraphRAG, QueryParam from nano_graphrag._llm import gpt_4o_mini_complete NANO_GRAPHRAG_AVAILABLE = True print("✅ nano-graphrag installed and imported successfully") except ImportError as e2: NANO_GRAPHRAG_AVAILABLE = False print(f"⚠️ Still failed to import after installation: {e2}") else: NANO_GRAPHRAG_AVAILABLE = False print("⚠️ nano-graphrag installation failed, running in demo mode") # Configuration pour l'API externe BORGES_API_URL = os.getenv("BORGES_API_URL", "https://borges-library.vercel.app/api/graphrag") ENABLE_EXTERNAL_API = os.getenv("ENABLE_EXTERNAL_API", "false").lower() == "true" class BorgesGraphRAG: def __init__(self): self.instances = {} self.current_book = None def load_book_data(self, book_folder: str): """Load GraphRAG data for a specific book""" if not NANO_GRAPHRAG_AVAILABLE: return False try: if book_folder not in self.instances: self.instances[book_folder] = GraphRAG( working_dir=book_folder, best_model_func=gpt_4o_mini_complete, cheap_model_func=gpt_4o_mini_complete, best_model_max_async=3, cheap_model_max_async=3 ) self.current_book = book_folder return True except Exception as e: print(f"Error loading book data: {e}") return False def parse_context_csv(self, context_str: str): """Parse the CSV context returned by GraphRAG""" entities = [] relations = [] # Parse entities section (format: id,entity,type,description) entities_match = re.search(r'-----Entities-----\n```csv\n(.*?)\n```', context_str, re.DOTALL) if entities_match: lines = entities_match.group(1).strip().split('\n') for line in lines[1:]: # Skip header if not line.strip(): continue parts = [p.strip() for p in line.split(',')] if len(parts) >= 4: entities.append({ 'id': parts[1], # entity name 'type': parts[2], # entity type 'description': ','.join(parts[3:]) if len(parts) > 4 else parts[3], # description (may contain commas) 'rank': 1.0 # default rank }) # Parse relationships section (format: id,source,target,description) relations_match = re.search(r'-----Relationships-----\n```csv\n(.*?)\n```', context_str, re.DOTALL) if relations_match: lines = relations_match.group(1).strip().split('\n') for line in lines[1:]: # Skip header if not line.strip(): continue parts = [p.strip() for p in line.split(',')] if len(parts) >= 4: relations.append({ 'source': parts[1], # source entity 'target': parts[2], # target entity 'description': ','.join(parts[3:]) if len(parts) > 4 else parts[3], # description (may contain commas) 'weight': 1.0, # default weight 'rank': 1.0 # default rank }) return entities, relations async def query_external_api(self, query: str, mode: str = "local") -> Dict[str, Any]: """Query external Borges API""" try: payload = { "query": query, "mode": mode } response = requests.post( f"{BORGES_API_URL}/search", json=payload, timeout=30 ) if response.status_code == 200: return response.json() else: return { "success": False, "error": f"API error: {response.status_code}", "query": query, "mode": mode } except Exception as e: return { "success": False, "error": f"Connection error: {str(e)}", "query": query, "mode": mode } async def query_book(self, query: str, mode: str = "local", use_external: bool = False) -> Dict[str, Any]: """Query the current book with GraphRAG or external API""" # Use external API if enabled and requested if use_external and ENABLE_EXTERNAL_API: return await self.query_external_api(query, mode) # Force GraphRAG usage - ensure we have a book loaded if not self.current_book and available_books: print(f"🔄 No current book, loading first available: {available_books[0]}") self.load_book_data(available_books[0]) try: graph_instance = self.instances[self.current_book] # Get context with details context_param = QueryParam(mode=mode, only_need_context=True, top_k=20) context = await graph_instance.aquery(query, param=context_param) # Get actual answer answer_param = QueryParam(mode=mode, top_k=20) answer = await graph_instance.aquery(query, param=answer_param) # Parse context (handle None case) if context: entities, relations = self.parse_context_csv(context) else: entities, relations = [], [] print("⚠️ Context is None, using empty entities/relations") return { "success": True, "answer": answer or "Réponse GraphRAG indisponible", "searchPath": { "entities": [ {**e, "order": i+1, "score": 1.0 - (i * 0.05)} for i, e in enumerate(entities[:15]) ], "relations": [ {**r, "traversalOrder": i+1} for i, r in enumerate(relations[:20]) ], "communities": [ {"id": "community_1", "content": "Cluster thématique principal", "relevance": 0.9} ] }, "book_id": self.current_book, "mode": mode, "query": query } except Exception as e: import traceback error_details = traceback.format_exc() print(f"🚨 Full GraphRAG error: {error_details}") return { "success": False, "error": f"GraphRAG error: {str(e)}", "error_details": error_details, "book_id": self.current_book or "unknown", "mode": mode, "query": query } # Initialize GraphRAG instance borges_rag = BorgesGraphRAG() # Check for available book data available_books = [] for item in os.listdir('.'): if os.path.isdir(item) and not item.startswith('.'): graph_file = os.path.join(item, 'graph_chunk_entity_relation.graphml') if os.path.exists(graph_file): available_books.append(item) if available_books: default_book = available_books[0] print(f"🔍 Trying to load default book: {default_book}") print(f"🔍 NANO_GRAPHRAG_AVAILABLE: {NANO_GRAPHRAG_AVAILABLE}") # Force loading with retries for attempt in range(3): try: if borges_rag.load_book_data(default_book): book_status = f"✅ Livre chargé: {default_book}" print(f"🎉 Successfully loaded book: {default_book} (attempt {attempt+1})") print(f"🎯 Current book set to: {borges_rag.current_book}") break except Exception as e: print(f"⚠️ Attempt {attempt+1} failed: {e}") else: book_status = f"❌ Échec du chargement après 3 tentatives: {default_book}" # Force set current book anyway borges_rag.current_book = default_book print(f"🔧 Force setting current book to: {default_book}") else: book_status = "❌ Aucune donnée GraphRAG trouvée" async def process_query(query: str, mode: str, use_external: bool = False) -> tuple: """Process a query and return formatted results""" if not query.strip(): return "❌ Veuillez entrer une question", "{}", "" try: result = await borges_rag.query_book(query, mode.lower(), use_external) if result.get("success"): # Format the answer answer = result["answer"] # Format search path info search_info = result["searchPath"] entities_count = len(search_info["entities"]) relations_count = len(search_info["relations"]) # Source info source = "API Borges" if use_external else "Local" # Create summary summary = f""" 📊 **Analyse de la traversée du graphe:** • {entities_count} entités identifiées • {relations_count} relations explorées • Mode: {result.get('mode', 'demo')} • Source: {source} • Livre: {result.get('book_id', 'demo')} """ # JSON for API json_result = json.dumps(result, indent=2, ensure_ascii=False) return answer, json_result, summary else: error_msg = result.get("error", "Erreur inconnue") fallback = result.get("fallback") if fallback and fallback.get("success"): answer = f"⚠️ Mode de secours activé:\n\n{fallback['answer']}" json_result = json.dumps(fallback, indent=2, ensure_ascii=False) summary = "📊 **Mode démo activé (erreur de connexion)**" return answer, json_result, summary else: return f"❌ Erreur: {error_msg}", "{}", "" except Exception as e: return f"❌ Exception: {str(e)}", "{}", "" # Gradio interface def query_interface(query: str, mode: str, use_external: bool = False): """Sync wrapper for async query processing""" loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) try: return loop.run_until_complete(process_query(query, mode, use_external)) finally: loop.close() # API endpoint for external calls def api_query(query: str, mode: str = "local", use_external: bool = False): """API endpoint that returns JSON response""" loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) try: result = loop.run_until_complete(borges_rag.query_book(query, mode, use_external)) return result finally: loop.close() def upload_and_process_book(file_obj): """Handle book upload and processing""" if file_obj is None: return "❌ Aucun fichier sélectionné", [] try: # Create temp directory for processing temp_dir = tempfile.mkdtemp(prefix="borges_book_") file_path = os.path.join(temp_dir, file_obj.name) # Save uploaded file with open(file_path, 'wb') as f: f.write(file_obj.read()) if file_obj.name.endswith('.zip'): # Handle ZIP file with GraphRAG data with zipfile.ZipFile(file_path, 'r') as zip_ref: zip_ref.extractall(temp_dir) # Look for GraphRAG data graphml_files = [] for root, dirs, files in os.walk(temp_dir): for file in files: if file.endswith('.graphml'): graphml_files.append(os.path.join(root, file)) if graphml_files: # Use first graphml directory as working directory working_dir = os.path.dirname(graphml_files[0]) book_id = os.path.basename(working_dir) # Load the book data if borges_rag.load_book_data(working_dir): available_books.append(book_id) return f"✅ Livre '{book_id}' chargé avec succès!", [book_id] + available_books else: return "❌ Erreur lors du chargement des données GraphRAG", available_books else: return "❌ Aucune donnée GraphRAG trouvée dans le fichier ZIP", available_books elif file_obj.name.endswith('.txt'): # Handle text file - create new GraphRAG instance if not NANO_GRAPHRAG_AVAILABLE: return "❌ nano-graphrag non disponible pour traiter les fichiers texte", available_books book_id = Path(file_obj.name).stem working_dir = os.path.join(temp_dir, book_id) os.makedirs(working_dir, exist_ok=True) # Create GraphRAG instance graph_instance = GraphRAG( working_dir=working_dir, best_model_func=gpt_4o_mini_complete, cheap_model_func=gpt_4o_mini_complete, best_model_max_async=3, cheap_model_max_async=3 ) # Read and process text with open(file_path, 'r', encoding='utf-8') as f: content = f.read() graph_instance.insert(content) # Load the processed data if borges_rag.load_book_data(working_dir): available_books.append(book_id) return f"✅ Livre '{book_id}' traité et chargé avec succès!", [book_id] + available_books else: return "❌ Erreur lors du traitement du fichier texte", available_books else: return "❌ Format de fichier non supporté. Utilisez .txt ou .zip", available_books except Exception as e: return f"❌ Erreur lors du traitement: {str(e)}", available_books def switch_book(book_id: str): """Switch to a different book""" if book_id and borges_rag.load_book_data(book_id): return f"✅ Livre '{book_id}' activé" else: return f"❌ Impossible de charger le livre '{book_id}'" # Gradio app with gr.Blocks( title="Borges Graph - GraphRAG Explorer", theme=gr.themes.Soft(primary_hue="amber"), css=""" .gradio-container { font-family: 'Georgia', serif; background: linear-gradient(135deg, #1a1a1a 0%, #2d2d2d 100%); color: #d4af37; } .gr-button-primary { background: linear-gradient(135deg, #d4af37 0%, #b8941f 100%); border: none; } """ ) as app: gr.Markdown(""" # 📚 Borges Graph - GraphRAG Explorer Explorez la bibliothèque infinie avec l'intelligence artificielle. Posez vos questions en langage naturel et découvrez les connexions secrètes dans l'univers borgésien. """) gr.Markdown(f"**Statut:** {book_status}") with gr.Tab("🔍 Recherche"): with gr.Row(): with gr.Column(scale=2): query_input = gr.Textbox( label="🔍 Votre question", placeholder="Quels sont les thèmes principaux de cette œuvre ?", lines=2 ) with gr.Row(): mode_select = gr.Radio( choices=["Local", "Global"], value="Local", label="Mode de recherche", info="Local: recherche focalisée | Global: vue d'ensemble" ) external_api_checkbox = gr.Checkbox( label="🌐 Utiliser l'API Borges", value=False, visible=ENABLE_EXTERNAL_API, info="Interroger directement l'API Borges en ligne" ) search_btn = gr.Button("🚀 Explorer le graphe", variant="primary") with gr.Column(scale=1): gr.Markdown(""" ### 💡 Questions suggérées: - Quels sont les thèmes principaux ? - Parle-moi des personnages - Quelle est la structure narrative ? - Comment les concepts sont-ils liés ? """) with gr.Row(): with gr.Column(): answer_output = gr.Markdown(label="📖 Réponse") summary_output = gr.Markdown(label="📊 Résumé de l'analyse") with gr.Accordion("🔧 Réponse JSON (pour développeurs)", open=False): json_output = gr.Code(language="json", label="JSON Response") with gr.Tab("📚 Gestion des livres"): with gr.Row(): with gr.Column(): gr.Markdown("### 📥 Uploader un nouveau livre") file_upload = gr.File( label="Sélectionner un fichier", file_types=[".txt", ".zip"], file_count="single" ) upload_btn = gr.Button("📤 Traiter le fichier", variant="secondary") upload_status = gr.Markdown("ℹ️ Aucun fichier sélectionné") with gr.Column(): gr.Markdown("### 🔄 Changer de livre") book_dropdown = gr.Dropdown( choices=available_books, label="Livres disponibles", value=available_books[0] if available_books else None ) switch_btn = gr.Button("🔄 Activer ce livre", variant="secondary") switch_status = gr.Markdown("") gr.Markdown(""" ### 📋 Instructions: - **Fichiers .txt**: Uploadez un texte brut qui sera traité par GraphRAG - **Fichiers .zip**: Uploadez des données GraphRAG pré-traitées (dossier avec .graphml) - L'API Borges permet d'interroger directement votre application Vercel """) # Event handlers if ENABLE_EXTERNAL_API: search_btn.click( fn=query_interface, inputs=[query_input, mode_select, external_api_checkbox], outputs=[answer_output, json_output, summary_output] ) query_input.submit( fn=query_interface, inputs=[query_input, mode_select, external_api_checkbox], outputs=[answer_output, json_output, summary_output] ) else: search_btn.click( fn=lambda query, mode: query_interface(query, mode, False), inputs=[query_input, mode_select], outputs=[answer_output, json_output, summary_output] ) query_input.submit( fn=lambda query, mode: query_interface(query, mode, False), inputs=[query_input, mode_select], outputs=[answer_output, json_output, summary_output] ) upload_btn.click( fn=upload_and_process_book, inputs=[file_upload], outputs=[upload_status, book_dropdown] ) switch_btn.click( fn=switch_book, inputs=[book_dropdown], outputs=[switch_status] ) # Launch the app if __name__ == "__main__": app.launch( server_name="0.0.0.0", server_port=7860, share=False )