Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import json | |
| import re | |
| import os | |
| import asyncio | |
| from pathlib import Path | |
| from typing import Dict, Any, List | |
| import tempfile | |
| import shutil | |
| import zipfile | |
| import requests | |
| # Install and import nano_graphrag from local source | |
| import subprocess | |
| import sys | |
| def install_nano_graphrag(): | |
| """Add nano-graphrag to Python path as simple module""" | |
| try: | |
| # Add nano-graphrag directory to Python path | |
| nano_dir = os.path.join(os.getcwd(), "nano-graphrag") | |
| if nano_dir not in sys.path: | |
| sys.path.insert(0, nano_dir) | |
| print("✅ nano-graphrag added to Python path") | |
| return True | |
| except Exception as e: | |
| print(f"⚠️ Failed to add nano-graphrag to path: {e}") | |
| return False | |
| # Try to import nano_graphrag, install if needed | |
| try: | |
| from nano_graphrag import GraphRAG, QueryParam | |
| from nano_graphrag._llm import gpt_4o_mini_complete | |
| NANO_GRAPHRAG_AVAILABLE = True | |
| print("✅ nano-graphrag imported successfully") | |
| except ImportError as e: | |
| print(f"⚠️ nano-graphrag not available, trying to install: {e}") | |
| if install_nano_graphrag(): | |
| try: | |
| from nano_graphrag import GraphRAG, QueryParam | |
| from nano_graphrag._llm import gpt_4o_mini_complete | |
| NANO_GRAPHRAG_AVAILABLE = True | |
| print("✅ nano-graphrag installed and imported successfully") | |
| except ImportError as e2: | |
| NANO_GRAPHRAG_AVAILABLE = False | |
| print(f"⚠️ Still failed to import after installation: {e2}") | |
| else: | |
| NANO_GRAPHRAG_AVAILABLE = False | |
| print("⚠️ nano-graphrag installation failed, running in demo mode") | |
| # Configuration pour l'API externe | |
| BORGES_API_URL = os.getenv("BORGES_API_URL", "https://borges-library.vercel.app/api/graphrag") | |
| ENABLE_EXTERNAL_API = os.getenv("ENABLE_EXTERNAL_API", "false").lower() == "true" | |
| class BorgesGraphRAG: | |
| def __init__(self): | |
| self.instances = {} | |
| self.current_book = None | |
| def load_book_data(self, book_folder: str): | |
| """Load GraphRAG data for a specific book""" | |
| if not NANO_GRAPHRAG_AVAILABLE: | |
| return False | |
| try: | |
| if book_folder not in self.instances: | |
| self.instances[book_folder] = GraphRAG( | |
| working_dir=book_folder, | |
| best_model_func=gpt_4o_mini_complete, | |
| cheap_model_func=gpt_4o_mini_complete, | |
| best_model_max_async=3, | |
| cheap_model_max_async=3 | |
| ) | |
| self.current_book = book_folder | |
| return True | |
| except Exception as e: | |
| print(f"Error loading book data: {e}") | |
| return False | |
| def parse_context_csv(self, context_str: str): | |
| """Parse the CSV context returned by GraphRAG""" | |
| entities = [] | |
| relations = [] | |
| # Parse entities section (format: id,entity,type,description) | |
| entities_match = re.search(r'-----Entities-----\n```csv\n(.*?)\n```', context_str, re.DOTALL) | |
| if entities_match: | |
| lines = entities_match.group(1).strip().split('\n') | |
| for line in lines[1:]: # Skip header | |
| if not line.strip(): | |
| continue | |
| parts = [p.strip() for p in line.split(',')] | |
| if len(parts) >= 4: | |
| entities.append({ | |
| 'id': parts[1], # entity name | |
| 'type': parts[2], # entity type | |
| 'description': ','.join(parts[3:]) if len(parts) > 4 else parts[3], # description (may contain commas) | |
| 'rank': 1.0 # default rank | |
| }) | |
| # Parse relationships section (format: id,source,target,description) | |
| relations_match = re.search(r'-----Relationships-----\n```csv\n(.*?)\n```', context_str, re.DOTALL) | |
| if relations_match: | |
| lines = relations_match.group(1).strip().split('\n') | |
| for line in lines[1:]: # Skip header | |
| if not line.strip(): | |
| continue | |
| parts = [p.strip() for p in line.split(',')] | |
| if len(parts) >= 4: | |
| relations.append({ | |
| 'source': parts[1], # source entity | |
| 'target': parts[2], # target entity | |
| 'description': ','.join(parts[3:]) if len(parts) > 4 else parts[3], # description (may contain commas) | |
| 'weight': 1.0, # default weight | |
| 'rank': 1.0 # default rank | |
| }) | |
| return entities, relations | |
| async def query_external_api(self, query: str, mode: str = "local") -> Dict[str, Any]: | |
| """Query external Borges API""" | |
| try: | |
| payload = { | |
| "query": query, | |
| "mode": mode | |
| } | |
| response = requests.post( | |
| f"{BORGES_API_URL}/search", | |
| json=payload, | |
| timeout=30 | |
| ) | |
| if response.status_code == 200: | |
| return response.json() | |
| else: | |
| return { | |
| "success": False, | |
| "error": f"API error: {response.status_code}", | |
| "query": query, | |
| "mode": mode | |
| } | |
| except Exception as e: | |
| return { | |
| "success": False, | |
| "error": f"Connection error: {str(e)}", | |
| "query": query, | |
| "mode": mode | |
| } | |
| async def query_book(self, query: str, mode: str = "local", use_external: bool = False) -> Dict[str, Any]: | |
| """Query the current book with GraphRAG or external API""" | |
| # Use external API if enabled and requested | |
| if use_external and ENABLE_EXTERNAL_API: | |
| return await self.query_external_api(query, mode) | |
| # Force GraphRAG usage - ensure we have a book loaded | |
| if not self.current_book and available_books: | |
| print(f"🔄 No current book, loading first available: {available_books[0]}") | |
| self.load_book_data(available_books[0]) | |
| try: | |
| graph_instance = self.instances[self.current_book] | |
| # Get context with details | |
| context_param = QueryParam(mode=mode, only_need_context=True, top_k=20) | |
| context = await graph_instance.aquery(query, param=context_param) | |
| # Get actual answer | |
| answer_param = QueryParam(mode=mode, top_k=20) | |
| answer = await graph_instance.aquery(query, param=answer_param) | |
| # Parse context (handle None case) | |
| if context: | |
| entities, relations = self.parse_context_csv(context) | |
| else: | |
| entities, relations = [], [] | |
| print("⚠️ Context is None, using empty entities/relations") | |
| return { | |
| "success": True, | |
| "answer": answer or "Réponse GraphRAG indisponible", | |
| "searchPath": { | |
| "entities": [ | |
| {**e, "order": i+1, "score": 1.0 - (i * 0.05)} | |
| for i, e in enumerate(entities[:15]) | |
| ], | |
| "relations": [ | |
| {**r, "traversalOrder": i+1} | |
| for i, r in enumerate(relations[:20]) | |
| ], | |
| "communities": [ | |
| {"id": "community_1", "content": "Cluster thématique principal", "relevance": 0.9} | |
| ] | |
| }, | |
| "book_id": self.current_book, | |
| "mode": mode, | |
| "query": query | |
| } | |
| except Exception as e: | |
| import traceback | |
| error_details = traceback.format_exc() | |
| print(f"🚨 Full GraphRAG error: {error_details}") | |
| return { | |
| "success": False, | |
| "error": f"GraphRAG error: {str(e)}", | |
| "error_details": error_details, | |
| "book_id": self.current_book or "unknown", | |
| "mode": mode, | |
| "query": query | |
| } | |
| # Initialize GraphRAG instance | |
| borges_rag = BorgesGraphRAG() | |
| # Check for available book data | |
| available_books = [] | |
| for item in os.listdir('.'): | |
| if os.path.isdir(item) and not item.startswith('.'): | |
| graph_file = os.path.join(item, 'graph_chunk_entity_relation.graphml') | |
| if os.path.exists(graph_file): | |
| available_books.append(item) | |
| if available_books: | |
| default_book = available_books[0] | |
| print(f"🔍 Trying to load default book: {default_book}") | |
| print(f"🔍 NANO_GRAPHRAG_AVAILABLE: {NANO_GRAPHRAG_AVAILABLE}") | |
| # Force loading with retries | |
| for attempt in range(3): | |
| try: | |
| if borges_rag.load_book_data(default_book): | |
| book_status = f"✅ Livre chargé: {default_book}" | |
| print(f"🎉 Successfully loaded book: {default_book} (attempt {attempt+1})") | |
| print(f"🎯 Current book set to: {borges_rag.current_book}") | |
| break | |
| except Exception as e: | |
| print(f"⚠️ Attempt {attempt+1} failed: {e}") | |
| else: | |
| book_status = f"❌ Échec du chargement après 3 tentatives: {default_book}" | |
| # Force set current book anyway | |
| borges_rag.current_book = default_book | |
| print(f"🔧 Force setting current book to: {default_book}") | |
| else: | |
| book_status = "❌ Aucune donnée GraphRAG trouvée" | |
| async def process_query(query: str, mode: str, use_external: bool = False) -> tuple: | |
| """Process a query and return formatted results""" | |
| if not query.strip(): | |
| return "❌ Veuillez entrer une question", "{}", "" | |
| try: | |
| result = await borges_rag.query_book(query, mode.lower(), use_external) | |
| if result.get("success"): | |
| # Format the answer | |
| answer = result["answer"] | |
| # Format search path info | |
| search_info = result["searchPath"] | |
| entities_count = len(search_info["entities"]) | |
| relations_count = len(search_info["relations"]) | |
| # Source info | |
| source = "API Borges" if use_external else "Local" | |
| # Create summary | |
| summary = f""" | |
| 📊 **Analyse de la traversée du graphe:** | |
| • {entities_count} entités identifiées | |
| • {relations_count} relations explorées | |
| • Mode: {result.get('mode', 'demo')} | |
| • Source: {source} | |
| • Livre: {result.get('book_id', 'demo')} | |
| """ | |
| # JSON for API | |
| json_result = json.dumps(result, indent=2, ensure_ascii=False) | |
| return answer, json_result, summary | |
| else: | |
| error_msg = result.get("error", "Erreur inconnue") | |
| fallback = result.get("fallback") | |
| if fallback and fallback.get("success"): | |
| answer = f"⚠️ Mode de secours activé:\n\n{fallback['answer']}" | |
| json_result = json.dumps(fallback, indent=2, ensure_ascii=False) | |
| summary = "📊 **Mode démo activé (erreur de connexion)**" | |
| return answer, json_result, summary | |
| else: | |
| return f"❌ Erreur: {error_msg}", "{}", "" | |
| except Exception as e: | |
| return f"❌ Exception: {str(e)}", "{}", "" | |
| # Gradio interface | |
| def query_interface(query: str, mode: str, use_external: bool = False): | |
| """Sync wrapper for async query processing""" | |
| loop = asyncio.new_event_loop() | |
| asyncio.set_event_loop(loop) | |
| try: | |
| return loop.run_until_complete(process_query(query, mode, use_external)) | |
| finally: | |
| loop.close() | |
| # API endpoint for external calls | |
| def api_query(query: str, mode: str = "local", use_external: bool = False): | |
| """API endpoint that returns JSON response""" | |
| loop = asyncio.new_event_loop() | |
| asyncio.set_event_loop(loop) | |
| try: | |
| result = loop.run_until_complete(borges_rag.query_book(query, mode, use_external)) | |
| return result | |
| finally: | |
| loop.close() | |
| def upload_and_process_book(file_obj): | |
| """Handle book upload and processing""" | |
| if file_obj is None: | |
| return "❌ Aucun fichier sélectionné", [] | |
| try: | |
| # Create temp directory for processing | |
| temp_dir = tempfile.mkdtemp(prefix="borges_book_") | |
| file_path = os.path.join(temp_dir, file_obj.name) | |
| # Save uploaded file | |
| with open(file_path, 'wb') as f: | |
| f.write(file_obj.read()) | |
| if file_obj.name.endswith('.zip'): | |
| # Handle ZIP file with GraphRAG data | |
| with zipfile.ZipFile(file_path, 'r') as zip_ref: | |
| zip_ref.extractall(temp_dir) | |
| # Look for GraphRAG data | |
| graphml_files = [] | |
| for root, dirs, files in os.walk(temp_dir): | |
| for file in files: | |
| if file.endswith('.graphml'): | |
| graphml_files.append(os.path.join(root, file)) | |
| if graphml_files: | |
| # Use first graphml directory as working directory | |
| working_dir = os.path.dirname(graphml_files[0]) | |
| book_id = os.path.basename(working_dir) | |
| # Load the book data | |
| if borges_rag.load_book_data(working_dir): | |
| available_books.append(book_id) | |
| return f"✅ Livre '{book_id}' chargé avec succès!", [book_id] + available_books | |
| else: | |
| return "❌ Erreur lors du chargement des données GraphRAG", available_books | |
| else: | |
| return "❌ Aucune donnée GraphRAG trouvée dans le fichier ZIP", available_books | |
| elif file_obj.name.endswith('.txt'): | |
| # Handle text file - create new GraphRAG instance | |
| if not NANO_GRAPHRAG_AVAILABLE: | |
| return "❌ nano-graphrag non disponible pour traiter les fichiers texte", available_books | |
| book_id = Path(file_obj.name).stem | |
| working_dir = os.path.join(temp_dir, book_id) | |
| os.makedirs(working_dir, exist_ok=True) | |
| # Create GraphRAG instance | |
| graph_instance = GraphRAG( | |
| working_dir=working_dir, | |
| best_model_func=gpt_4o_mini_complete, | |
| cheap_model_func=gpt_4o_mini_complete, | |
| best_model_max_async=3, | |
| cheap_model_max_async=3 | |
| ) | |
| # Read and process text | |
| with open(file_path, 'r', encoding='utf-8') as f: | |
| content = f.read() | |
| graph_instance.insert(content) | |
| # Load the processed data | |
| if borges_rag.load_book_data(working_dir): | |
| available_books.append(book_id) | |
| return f"✅ Livre '{book_id}' traité et chargé avec succès!", [book_id] + available_books | |
| else: | |
| return "❌ Erreur lors du traitement du fichier texte", available_books | |
| else: | |
| return "❌ Format de fichier non supporté. Utilisez .txt ou .zip", available_books | |
| except Exception as e: | |
| return f"❌ Erreur lors du traitement: {str(e)}", available_books | |
| def switch_book(book_id: str): | |
| """Switch to a different book""" | |
| if book_id and borges_rag.load_book_data(book_id): | |
| return f"✅ Livre '{book_id}' activé" | |
| else: | |
| return f"❌ Impossible de charger le livre '{book_id}'" | |
| # Gradio app | |
| with gr.Blocks( | |
| title="Borges Graph - GraphRAG Explorer", | |
| theme=gr.themes.Soft(primary_hue="amber"), | |
| css=""" | |
| .gradio-container { | |
| font-family: 'Georgia', serif; | |
| background: linear-gradient(135deg, #1a1a1a 0%, #2d2d2d 100%); | |
| color: #d4af37; | |
| } | |
| .gr-button-primary { | |
| background: linear-gradient(135deg, #d4af37 0%, #b8941f 100%); | |
| border: none; | |
| } | |
| """ | |
| ) as app: | |
| gr.Markdown(""" | |
| # 📚 Borges Graph - GraphRAG Explorer | |
| Explorez la bibliothèque infinie avec l'intelligence artificielle. Posez vos questions en langage naturel et découvrez les connexions secrètes dans l'univers borgésien. | |
| """) | |
| gr.Markdown(f"**Statut:** {book_status}") | |
| with gr.Tab("🔍 Recherche"): | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| query_input = gr.Textbox( | |
| label="🔍 Votre question", | |
| placeholder="Quels sont les thèmes principaux de cette œuvre ?", | |
| lines=2 | |
| ) | |
| with gr.Row(): | |
| mode_select = gr.Radio( | |
| choices=["Local", "Global"], | |
| value="Local", | |
| label="Mode de recherche", | |
| info="Local: recherche focalisée | Global: vue d'ensemble" | |
| ) | |
| external_api_checkbox = gr.Checkbox( | |
| label="🌐 Utiliser l'API Borges", | |
| value=False, | |
| visible=ENABLE_EXTERNAL_API, | |
| info="Interroger directement l'API Borges en ligne" | |
| ) | |
| search_btn = gr.Button("🚀 Explorer le graphe", variant="primary") | |
| with gr.Column(scale=1): | |
| gr.Markdown(""" | |
| ### 💡 Questions suggérées: | |
| - Quels sont les thèmes principaux ? | |
| - Parle-moi des personnages | |
| - Quelle est la structure narrative ? | |
| - Comment les concepts sont-ils liés ? | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| answer_output = gr.Markdown(label="📖 Réponse") | |
| summary_output = gr.Markdown(label="📊 Résumé de l'analyse") | |
| with gr.Accordion("🔧 Réponse JSON (pour développeurs)", open=False): | |
| json_output = gr.Code(language="json", label="JSON Response") | |
| with gr.Tab("📚 Gestion des livres"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("### 📥 Uploader un nouveau livre") | |
| file_upload = gr.File( | |
| label="Sélectionner un fichier", | |
| file_types=[".txt", ".zip"], | |
| file_count="single" | |
| ) | |
| upload_btn = gr.Button("📤 Traiter le fichier", variant="secondary") | |
| upload_status = gr.Markdown("ℹ️ Aucun fichier sélectionné") | |
| with gr.Column(): | |
| gr.Markdown("### 🔄 Changer de livre") | |
| book_dropdown = gr.Dropdown( | |
| choices=available_books, | |
| label="Livres disponibles", | |
| value=available_books[0] if available_books else None | |
| ) | |
| switch_btn = gr.Button("🔄 Activer ce livre", variant="secondary") | |
| switch_status = gr.Markdown("") | |
| gr.Markdown(""" | |
| ### 📋 Instructions: | |
| - **Fichiers .txt**: Uploadez un texte brut qui sera traité par GraphRAG | |
| - **Fichiers .zip**: Uploadez des données GraphRAG pré-traitées (dossier avec .graphml) | |
| - L'API Borges permet d'interroger directement votre application Vercel | |
| """) | |
| # Event handlers | |
| if ENABLE_EXTERNAL_API: | |
| search_btn.click( | |
| fn=query_interface, | |
| inputs=[query_input, mode_select, external_api_checkbox], | |
| outputs=[answer_output, json_output, summary_output] | |
| ) | |
| query_input.submit( | |
| fn=query_interface, | |
| inputs=[query_input, mode_select, external_api_checkbox], | |
| outputs=[answer_output, json_output, summary_output] | |
| ) | |
| else: | |
| search_btn.click( | |
| fn=lambda query, mode: query_interface(query, mode, False), | |
| inputs=[query_input, mode_select], | |
| outputs=[answer_output, json_output, summary_output] | |
| ) | |
| query_input.submit( | |
| fn=lambda query, mode: query_interface(query, mode, False), | |
| inputs=[query_input, mode_select], | |
| outputs=[answer_output, json_output, summary_output] | |
| ) | |
| upload_btn.click( | |
| fn=upload_and_process_book, | |
| inputs=[file_upload], | |
| outputs=[upload_status, book_dropdown] | |
| ) | |
| switch_btn.click( | |
| fn=switch_book, | |
| inputs=[book_dropdown], | |
| outputs=[switch_status] | |
| ) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| app.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False | |
| ) |