Hindi-Rag

Sleeping

App Files Files Community

pranavinani commited on Jul 12, 2025

Commit

e2e039a

1 Parent(s): 0ae63a7

fixed pdf not showing bug

Browse files

Files changed (2) hide show

Man Ki baat.txt +0 -24
app.py +97 -19

Man Ki baat.txt DELETED Viewed

@@ -1,24 +0,0 @@
-मन की बात - प्रधानमंत्री नरेंद्र मोदी के रेडियो कार्यक्रम से
-मेरे प्यारे देशवासियो, नमस्कार। आज फिर एक बार मन की बात के माध्यम से आप सभी से जुड़ने का अवसर मिला है।
-यह कार्यक्रम देश के कोने-कोने से आने वाली आवाजों को एक मंच प्रदान करता है। आज मैं कुछ ऐसी बातें आपके साथ साझा करना चाहता हूं जो हमारे समाज और देश के विकास से जुड़ी हुई हैं।
-शिक्षा का महत्व:
-शिक्षा हमारे जीवन की आधारशिला है। यह न केवल व्यक्तिगत विकास के लिए आवश्यक है बल्कि राष्ट्रीय प्रगति के लिए भी अत्यंत महत्वपूर्ण है। हमें अपने बच्चों को न केवल पुस्तकीय ज्ञान देना चाहिए बल्कि व्यावहारिक शिक्षा भी प्रदान करनी चाहिए।
-युवाओं की शक्ति:
-हमारे देश की सबसे बड़ी संपत्ति हमारे युवा हैं। वे नवाचार, उद्यमिता और तकनीकी प्रगति के क्षेत्र में अपनी छाप छोड़ रहे हैं। हमें उनका समर्थन करना चाहिए और उन्हें आगे बढ़ने के लिए प्रेरित करना चाहिए।
-पर्यावरण संरक्षण:
-पर्यावरण का संरक्षण आज के समय की सबसे बड़ी आवश्यकता है। हमें प्रकृति के साथ सामंजस्य बिठाकर विकास करना होगा। वृक्षारोपण, जल संरक्षण, और स्वच्छता के माध्यम से हम एक बेहतर भविष्य का निर्माण कर सकते हैं।
-सामाजिक एकता:
-हमारे देश की विविधता में एकता ही हमारी सबसे बड़ी ताकत है। हमें सभी धर्मों, जातियों और भाषाओं का सम्मान करते हुए एक साथ आगे बढ़ना चाहिए।
-तकनीकी प्रगति:
-डिजिटल इंडिया की दिशा में हमारे कदम तेजी से आगे बढ़ रहे हैं। तकनीक का उपयोग करके हम न केवल अपना जीवन आसान बना सकते हैं बल्कि राष्ट्रीय विकास में भी योगदान दे सकते हैं।
-अंत में, मैं आप सभी से कहना चाहूंगा कि हम सभी मिलकर एक नए भारत का निर्माण कर सकते हैं। एक ऐसा भारत जो आत्मनिर्भर हो, स्वच्छ हो, और जहां हर व्यक्ति को आगे बढ़ने का समान अवसर मिले।
-धन्यवाद। जय हिंद!

app.py CHANGED Viewed

@@ -433,7 +433,7 @@ def reset_session():
 # Book management functions
 def get_available_books():
-    """Get list of available books with their thumbnails and text files"""
     books = []
     try:
@@ -447,16 +447,16 @@ def get_available_books():
         else:
             thumbnail_files = []
-        # Get all text files from OCR directory
         if os.path.exists(ocr_dir):
-            text_files = [f for f in os.listdir(ocr_dir)
-                         if f.lower().endswith('.txt')]
         else:
-            text_files = []
-        # Create book entries
-        for text_file in text_files:
-            book_name = os.path.splitext(text_file)[0]
             # Look for matching thumbnail
             thumbnail_path = None
@@ -475,7 +475,7 @@ def get_available_books():
             books.append({
                 'name': book_name,
                 'display_name': book_name.replace('_', ' ').title(),
-                'text_file': os.path.join(ocr_dir, text_file),
                 'thumbnail': thumbnail_path
             })
@@ -516,19 +516,19 @@ def create_text_placeholder(book_name):
         print(f"Error creating placeholder: {str(e)}")
         return None
-def load_book_text(book_info):
-    """Load text content from a pre-existing book"""
     try:
-        with open(book_info['text_file'], 'r', encoding='utf-8') as file:
-            content = file.read()
-        if not content.strip():
-            return "Error: Empty text file"
-        return content
     except Exception as e:
-        return f"Error loading book text: {str(e)}"
 def process_selected_book(selected_book_name):
     """Process a pre-selected book"""
@@ -549,8 +549,8 @@ def process_selected_book(selected_book_name):
         if not selected_book:
             return "चुनी गई पुस्तक नहीं मिली।", "", "", gr.update(visible=False)
-        # Load text content
-        text_content = load_book_text(selected_book)
         if not text_content.strip() or "Error" in text_content:
             return text_content, "", "", gr.update(visible=False)
@@ -636,6 +636,80 @@ def handle_gallery_selection(evt: gr.SelectData):
     return "None"
 # Create Gradio interface
 def create_interface():
     """Create the Gradio interface"""
@@ -840,6 +914,10 @@ def create_interface():
 def main():
     """Main function to launch the application"""
     print("🚀 Starting Hindi RAG Voice Demo (Groq Whisper API Version)...")
     print("📋 Loading AI models (this may take a moment)...")
     # Pre-load models

 # Book management functions
 def get_available_books():
+    """Get list of available books with their thumbnails and PDF files"""
     books = []
     try:
         else:
             thumbnail_files = []
+        # Get all PDF files from OCR directory
         if os.path.exists(ocr_dir):
+            pdf_files = [f for f in os.listdir(ocr_dir)
+                         if f.lower().endswith('.pdf')]
         else:
+            pdf_files = []
+        # Create book entries for PDF files
+        for pdf_file in pdf_files:
+            book_name = os.path.splitext(pdf_file)[0]
             # Look for matching thumbnail
             thumbnail_path = None
             books.append({
                 'name': book_name,
                 'display_name': book_name.replace('_', ' ').title(),
+                'pdf_file': os.path.join(ocr_dir, pdf_file),
                 'thumbnail': thumbnail_path
             })
         print(f"Error creating placeholder: {str(e)}")
         return None
+def load_book_pdf(book_info):
+    """Load text content from a pre-existing PDF book"""
     try:
+        # Extract text from PDF using the existing function
+        text_content = extract_text_from_pdf(book_info['pdf_file'])
+        if not text_content.strip() or "Error" in text_content:
+            return text_content
+        return text_content
     except Exception as e:
+        return f"Error loading PDF book: {str(e)}"
 def process_selected_book(selected_book_name):
     """Process a pre-selected book"""
         if not selected_book:
             return "चुनी गई पुस्तक नहीं मिली।", "", "", gr.update(visible=False)
+        # Load PDF content and extract text
+        text_content = load_book_pdf(selected_book)
         if not text_content.strip() or "Error" in text_content:
             return text_content, "", "", gr.update(visible=False)
     return "None"
+# LFS file handling for Hugging Face Spaces
+def ensure_lfs_files_downloaded():
+    """Ensure LFS files are downloaded in Hugging Face Spaces environment"""
+    try:
+        # Check if we're in a Hugging Face Spaces environment
+        if os.getenv('SPACE_ID') or os.getenv('HUGGINGFACE_HUB_CACHE'):
+            print("🔄 Detected Hugging Face Spaces environment, checking LFS files...")
+            # Check if PDF files exist and are not LFS pointers
+            ocr_dir = CONFIG['OCR_BOOKS_DIR']
+            if os.path.exists(ocr_dir):
+                pdf_files = [f for f in os.listdir(ocr_dir) if f.lower().endswith('.pdf')]
+                for pdf_file in pdf_files:
+                    pdf_path = os.path.join(ocr_dir, pdf_file)
+                    # Check if file is an LFS pointer (small text file)
+                    if os.path.exists(pdf_path):
+                        file_size = os.path.getsize(pdf_path)
+                        # LFS pointer files are typically very small (< 200 bytes)
+                        if file_size < 200:
+                            print(f"📁 {pdf_file} appears to be an LFS pointer, attempting download...")
+                            # Try to download using git lfs pull for this specific file
+                            try:
+                                result = subprocess.run(
+                                    ['git', 'lfs', 'pull', '--include', f"ocr_books/{pdf_file}"],
+                                    cwd=os.getcwd(),
+                                    capture_output=True,
+                                    text=True,
+                                    timeout=60
+                                )
+                                if result.returncode == 0:
+                                    print(f"✅ Successfully downloaded {pdf_file}")
+                                else:
+                                    print(f"⚠️ Could not download {pdf_file}: {result.stderr}")
+                            except subprocess.TimeoutExpired:
+                                print(f"⏰ Timeout downloading {pdf_file}")
+                            except Exception as e:
+                                print(f"❌ Error downloading {pdf_file}: {str(e)}")
+                        else:
+                            print(f"✅ {pdf_file} already downloaded ({file_size:,} bytes)")
+            # Also check thumbnails
+            thumbnail_dir = CONFIG['BOOK_THUMBNAILS_DIR']
+            if os.path.exists(thumbnail_dir):
+                image_files = [f for f in os.listdir(thumbnail_dir)
+                              if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp'))]
+                for img_file in image_files:
+                    img_path = os.path.join(thumbnail_dir, img_file)
+                    if os.path.exists(img_path):
+                        file_size = os.path.getsize(img_path)
+                        if file_size < 200:  # Likely an LFS pointer
+                            print(f"📁 {img_file} appears to be an LFS pointer, attempting download...")
+                            try:
+                                result = subprocess.run(
+                                    ['git', 'lfs', 'pull', '--include', f"book_thumbnails/{img_file}"],
+                                    cwd=os.getcwd(),
+                                    capture_output=True,
+                                    text=True,
+                                    timeout=30
+                                )
+                                if result.returncode == 0:
+                                    print(f"✅ Successfully downloaded {img_file}")
+                            except Exception as e:
+                                print(f"❌ Error downloading {img_file}: {str(e)}")
+    except Exception as e:
+        print(f"⚠️ Error checking LFS files: {str(e)}")
 # Create Gradio interface
 def create_interface():
     """Create the Gradio interface"""
 def main():
     """Main function to launch the application"""
     print("🚀 Starting Hindi RAG Voice Demo (Groq Whisper API Version)...")
+    # Ensure LFS files are available (important for Hugging Face Spaces)
+    ensure_lfs_files_downloaded()
     print("📋 Loading AI models (this may take a moment)...")
     # Pre-load models