| import re
|
| import gradio as gr
|
| import pdfplumber
|
| import docx
|
| import tempfile
|
| from pptx import Presentation
|
| import traceback
|
| import os
|
|
|
|
|
|
|
|
|
| ACCESS_KEY = "32015labmath@1a"
|
|
|
|
|
|
|
|
|
|
|
| def split_sentences(text):
|
| """Version robuste du tokenizer"""
|
| if not text or not isinstance(text, str):
|
| return []
|
|
|
|
|
| text = re.sub(r'\s+', ' ', text.strip())
|
|
|
| if not text:
|
| return []
|
|
|
|
|
| sentences = re.split(r'(?<=[.!?])\s+(?=[A-Z0-9])', text)
|
|
|
|
|
| if len(sentences) <= 2:
|
| sentences = re.split(r'[.!?]+', text)
|
|
|
|
|
| sentences = [s.strip() for s in sentences if s and len(s.strip()) > 15]
|
|
|
| return sentences if sentences else [text]
|
|
|
|
|
|
|
|
|
|
|
| class DocumentAnalyzer:
|
| def detect_document_type(self, text):
|
| """Détection améliorée du type de document"""
|
| if not text or not isinstance(text, str):
|
| return "Document Général"
|
|
|
| text = text.lower()[:5000]
|
|
|
| rules = {
|
| "Article Scientifique": [
|
| "abstract", "résumé", "méthodologie", "methodology",
|
| "discussion", "résultats", "results", "bibliographie",
|
| "references", "introduction", "conclusion", "étude", "study"
|
| ],
|
| "Rapport de Réunion": [
|
| "ordre du jour", "participants", "présents", "compte rendu",
|
| "décisions", "actions", "prochaine réunion", "points discutés",
|
| "réunion", "meeting", "discuté", "discussed"
|
| ],
|
| "Cours": [
|
| "chapitre", "définition", "definition", "objectifs",
|
| "exemple", "example", "exercice", "exercise", "cours",
|
| "leçon", "lesson", "apprentissage", "learning"
|
| ],
|
| "Rapport Technique": [
|
| "objectif", "objective", "analyse", "spécifications",
|
| "résultat", "result", "recommandation", "recommendation",
|
| "technique", "technical", "implémentation", "implementation"
|
| ]
|
| }
|
|
|
| scores = {doc_type: sum(text.count(word) for word in words)
|
| for doc_type, words in rules.items()}
|
|
|
|
|
| if max(scores.values()) == 0:
|
| return "Document Général"
|
|
|
| return max(scores, key=scores.get)
|
|
|
| def detect_sections(self, text):
|
| """Détection améliorée des sections"""
|
| if not text or not isinstance(text, str):
|
| return [text] if text else ["Contenu non disponible"]
|
|
|
|
|
| patterns = [
|
| r'\n\s*[A-Z][A-Z\sÉÈÊÀÂÔÙÛÎÏÇ]{4,}\s*\n',
|
| r'\n\s*\d+\.\s+[A-Z][^.]+?\n',
|
| r'\n\s*[A-Z][a-zéèêàôûïüç]+(?:\s+[a-zéèêàôûïüç]+)*:\s*\n',
|
| r'\n\s*#{1,3}\s+.+\n',
|
| r'\n\s*[IVX]+\.\s+[A-Z][^.\n]+\n'
|
| ]
|
|
|
|
|
| sections = []
|
| for pattern in patterns:
|
| split_text = re.split(pattern, f"\n{text}\n")
|
| if len(split_text) > 2:
|
| sections = split_text
|
| break
|
|
|
|
|
| if not sections or len(sections) <= 1:
|
|
|
| words = text.split()
|
| chunk_size = 500
|
| sections = [' '.join(words[i:i+chunk_size])
|
| for i in range(0, len(words), chunk_size)]
|
|
|
|
|
| sections = [s.strip() for s in sections if s and len(s.strip()) > 100]
|
|
|
| return sections if sections else [text[:1000]]
|
|
|
|
|
|
|
|
|
|
|
| class ContentCompressor:
|
| def summarize(self, text, max_sentences=3):
|
| """Résumé amélioré avec scoring"""
|
| if not text or not isinstance(text, str):
|
| return ["Aucun contenu disponible"]
|
|
|
| sentences = split_sentences(text)
|
|
|
| if len(sentences) <= max_sentences:
|
| return sentences
|
|
|
|
|
| scored_sentences = []
|
| for i, sent in enumerate(sentences):
|
|
|
| position_score = 1.0
|
| if i < len(sentences) * 0.2:
|
| position_score = 1.5
|
| elif i > len(sentences) * 0.8:
|
| position_score = 1.3
|
|
|
|
|
| length = len(sent)
|
| if 50 <= length <= 200:
|
| length_score = 1.5
|
| elif length < 30:
|
| length_score = 0.5
|
| else:
|
| length_score = 1.0
|
|
|
|
|
| keywords = ["important", "conclusion", "résultat", "résumé",
|
| "significatif", "principal", "essentiel"]
|
| keyword_score = 1.0 + (0.2 * sum(1 for kw in keywords if kw in sent.lower()))
|
|
|
| total_score = position_score * length_score * keyword_score
|
| scored_sentences.append((sent, total_score))
|
|
|
|
|
| scored_sentences.sort(key=lambda x: x[1], reverse=True)
|
| best_sentences = [s[0] for s in scored_sentences[:max_sentences]]
|
|
|
|
|
| best_sentences.sort(key=lambda x: text.find(x))
|
|
|
| return best_sentences
|
|
|
|
|
|
|
|
|
|
|
| class AdaptivePlanner:
|
| def __init__(self):
|
| self.compressor = ContentCompressor()
|
|
|
| def build_plan(self, doc_type, sections):
|
| """Construction améliorée du plan"""
|
|
|
| structures = {
|
| "Article Scientifique": [
|
| ("📋 Introduction", "Présentation du sujet et contexte"),
|
| ("🎯 Problématique", "Questions de recherche"),
|
| ("🔬 Méthodologie", "Approche et méthodes utilisées"),
|
| ("📊 Résultats", "Principaux résultats obtenus"),
|
| ("💭 Discussion", "Analyse et interprétation"),
|
| ("✅ Conclusion", "Synthèse et perspectives")
|
| ],
|
| "Rapport de Réunion": [
|
| ("📅 Contexte", "Objectifs et cadre de la réunion"),
|
| ("👥 Participants", "Personnes présentes et absentes"),
|
| ("💬 Points discutés", "Sujets abordés"),
|
| ("⚡ Décisions", "Décisions prises"),
|
| ("📋 Actions", "Actions à réaliser"),
|
| ("📆 Prochaines étapes", "Planning et échéances")
|
| ],
|
| "Cours": [
|
| ("🎯 Objectifs", "Ce que vous allez apprendre"),
|
| ("📚 Concepts clés", "Définitions et notions importantes"),
|
| ("🔍 Explications", "Détails et approfondissements"),
|
| ("💡 Exemples", "Cas pratiques et illustrations"),
|
| ("✍️ Exercices", "Mise en pratique"),
|
| ("📝 Résumé", "Points essentiels à retenir")
|
| ],
|
| "Rapport Technique": [
|
| ("📌 Introduction", "Contexte et objectifs"),
|
| ("⚙️ Méthodologie", "Approche technique"),
|
| ("📈 Résultats", "Données et observations"),
|
| ("🔧 Analyse", "Interprétation technique"),
|
| ("💡 Recommandations", "Suggestions et améliorations"),
|
| ("✅ Conclusion", "Synthèse finale")
|
| ],
|
| "Document Général": [
|
| ("📌 Introduction", "Présentation générale"),
|
| ("📋 Points principaux", "Idées clés"),
|
| ("📝 Développement", "Contenu détaillé"),
|
| ("💡 Synthèse", "Points importants"),
|
| ("✅ Conclusion", "Récapitulatif")
|
| ]
|
| }
|
|
|
| structure = structures.get(doc_type, structures["Document Général"])
|
|
|
| slides = []
|
|
|
| for i, (title, default_desc) in enumerate(structure):
|
|
|
| if i < len(sections) and sections[i]:
|
| content = self.compressor.summarize(sections[i], max_sentences=2)
|
| else:
|
|
|
| found = False
|
| for section in sections:
|
| if len(section) > 100:
|
| content = self.compressor.summarize(section, max_sentences=2)
|
| found = True
|
| break
|
|
|
| if not found:
|
| content = [default_desc]
|
|
|
| slides.append({
|
| "title": title,
|
| "content": content
|
| })
|
|
|
| return slides
|
|
|
|
|
|
|
|
|
|
|
| class SlideDesigner:
|
| def generate_presentation(self, title, slides):
|
| """Génération améliorée du PowerPoint"""
|
| try:
|
| prs = Presentation()
|
|
|
|
|
| slide_layout = prs.slide_layouts[0]
|
| slide = prs.slides.add_slide(slide_layout)
|
| slide.shapes.title.text = title
|
|
|
| if slide.placeholders[1].has_text_frame:
|
| slide.placeholders[1].text = "Généré automatiquement"
|
|
|
|
|
| content_layout = prs.slide_layouts[1]
|
|
|
| for slide_data in slides:
|
| slide = prs.slides.add_slide(content_layout)
|
|
|
|
|
| slide.shapes.title.text = slide_data["title"]
|
|
|
|
|
| if slide.placeholders[1].has_text_frame:
|
| tf = slide.placeholders[1].text_frame
|
| tf.clear()
|
|
|
| for bullet in slide_data["content"]:
|
| if bullet and bullet.strip():
|
| p = tf.add_paragraph()
|
| p.text = bullet[:150] + ("..." if len(bullet) > 150 else "")
|
| p.level = 0
|
|
|
|
|
| with tempfile.NamedTemporaryFile(delete=False, suffix='.pptx') as tmp:
|
| ppt_path = tmp.name
|
| prs.save(ppt_path)
|
|
|
| return ppt_path
|
|
|
| except Exception as e:
|
| print(f"Erreur PowerPoint: {str(e)}")
|
| raise
|
|
|
|
|
|
|
|
|
|
|
| def read_file_safe(file):
|
| """Lecture sécurisée des fichiers"""
|
| if file is None:
|
| return None
|
|
|
| try:
|
|
|
| if hasattr(file, 'name'):
|
| file_path = file.name
|
| else:
|
| file_path = file
|
|
|
| if not os.path.exists(file_path):
|
| raise ValueError("Fichier temporaire non trouvé")
|
|
|
|
|
| if file_path.endswith('.pdf'):
|
| text = ""
|
| with pdfplumber.open(file_path) as pdf:
|
| for page in pdf.pages[:10]:
|
| page_text = page.extract_text()
|
| if page_text:
|
| text += page_text + "\n"
|
| return text.strip() or "Texte non extractible"
|
|
|
| elif file_path.endswith(('.docx', '.doc')):
|
| doc = docx.Document(file_path)
|
| text = [p.text for p in doc.paragraphs if p.text.strip()]
|
| return "\n".join(text) or "Document vide"
|
|
|
| else:
|
| encodings = ['utf-8', 'latin-1', 'cp1252']
|
| for enc in encodings:
|
| try:
|
| with open(file_path, 'r', encoding=enc) as f:
|
| return f.read()
|
| except UnicodeDecodeError:
|
| continue
|
| return open(file_path, 'r', encoding='utf-8', errors='ignore').read()
|
|
|
| except Exception as e:
|
| raise ValueError(f"Erreur de lecture: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
| analyzer = DocumentAnalyzer()
|
| planner = AdaptivePlanner()
|
| designer = SlideDesigner()
|
|
|
| def generate_presentation(file, access_key):
|
| """Fonction principale de génération"""
|
| try:
|
|
|
| if access_key != ACCESS_KEY:
|
| return "Clé d'accès incorrecte", None
|
|
|
| if file is None:
|
| return "Veuillez télécharger un fichier", None
|
|
|
|
|
| text = read_file_safe(file)
|
|
|
| if not text or len(text.strip()) < 50:
|
| return "Document trop court ou vide", None
|
|
|
|
|
| doc_type = analyzer.detect_document_type(text)
|
| sections = analyzer.detect_sections(text)
|
|
|
|
|
| slides = planner.build_plan(doc_type, sections)
|
|
|
|
|
| ppt_path = designer.generate_presentation(doc_type, slides)
|
|
|
| return f"✅ Présentation générée avec succès! ({doc_type})", ppt_path
|
|
|
| except Exception as e:
|
| print(traceback.format_exc())
|
| return f"❌ Erreur: {str(e)}", None
|
|
|
|
|
|
|
|
|
|
|
| with gr.Blocks(title="SmartSlideAI", theme="soft") as interface:
|
| gr.Markdown("""
|
| # 🤖 SmartSlideAI
|
|
|
| ### Transformez vos documents en présentations PowerPoint instantanément
|
|
|
| **Formats supportés:** PDF, DOCX, TXT
|
| """)
|
|
|
| with gr.Row():
|
| with gr.Column(scale=1):
|
| file_input = gr.File(
|
| label="📄 Télécharger votre document",
|
| file_types=[".txt", ".pdf", ".docx", ".doc"],
|
| height=150
|
| )
|
|
|
| key_input = gr.Textbox(
|
| label="🔑 Clé d'accès",
|
| type="password",
|
| placeholder="Entrez votre clé"
|
| )
|
|
|
| generate_btn = gr.Button("🚀 Générer la présentation", variant="primary")
|
|
|
| with gr.Column(scale=1):
|
| status_output = gr.Textbox(
|
| label="📊 Statut",
|
| interactive=False,
|
| lines=2
|
| )
|
|
|
| file_output = gr.File(
|
| label="📥 Télécharger la présentation",
|
| interactive=False
|
| )
|
|
|
| generate_btn.click(
|
| fn=generate_presentation,
|
| inputs=[file_input, key_input],
|
| outputs=[status_output, file_output]
|
| )
|
|
|
| gr.Markdown("""
|
| ---
|
| ### 📝 Instructions
|
| 1. Téléchargez votre document (PDF, DOCX ou TXT)
|
| 2. Entrez votre clé d'accès
|
| 3. Cliquez sur "Générer"
|
| 4. Téléchargez votre présentation PowerPoint
|
| """)
|
|
|
|
|
| if __name__ == "__main__":
|
| interface.launch(
|
| server_name="0.0.0.0",
|
| server_port=7860,
|
| share=False,
|
| debug=False
|
| ) |