BatAnki2.0

Runtime error

App Files Files Community

Pavaas commited on Jun 21, 2025

Commit

b0e0069

verified ·

1 Parent(s): f3c32ac

Update config.py

Browse files

Files changed (1) hide show

config.py +90 -47

config.py CHANGED Viewed

@@ -2,84 +2,127 @@ import fitz  # PyMuPDF
 import pytesseract
 import easyocr
 import whisper
-import speech_recognition as sr
-from transformers import pipeline
 import os
-from PIL import Image
-import genanki
 import uuid
-import tempfile
-import shutil
-# === OCR and Text Extraction ===
-def process_pdf(file_path):
-    doc = fitz.open(file_path)
     text = ""
     for page in doc:
-        text += page.get_text()
-        if not text.strip():  # fallback to OCR
             pix = page.get_pixmap()
             img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
             text += pytesseract.image_to_string(img)
     return text
-def process_image(file_path):
     reader = easyocr.Reader(['en'])
-    result = reader.readtext(file_path, detail=0)
     return "\n".join(result)
-def process_audio(file_path):
     model = whisper.load_model("base")
-    result = model.transcribe(file_path)
     return result["text"]
-# === AI Flashcard Generation ===
-def generate_flashcards(text):
-    generator = pipeline("text2text-generation", model="t5-base", max_length=64)
-    flashcards = []
-    chunks = [text[i:i+400] for i in range(0, len(text), 400)]
     for chunk in chunks:
-        prompt = f"Generate a flashcard question and answer from this medical text:\n{chunk}"
-        output = generator(prompt)[0]['generated_text']
-        if ":" in output:
-            q, a = output.split(":", 1)
-        else:
-            q, a = "Question", output
-        flashcards.append({"question": q.strip(), "answer": a.strip()})
-        if len(flashcards) >= 15:
             break
-    return flashcards
-# === Export to Anki (.apkg) ===
 def export_to_apkg(cards, deck_name):
     deck_id = int(str(uuid.uuid4().int)[:10])
     my_deck = genanki.Deck(deck_id, deck_name)
     model = genanki.Model(
         1607392319,
         'BatAnkiModel',
-        fields=[
-            {'name': 'Question'},
-            {'name': 'Answer'},
-        ],
-        templates=[
-            {
-                'name': 'Card 1',
-                'qfmt': '{{Question}}',
-                'afmt': '{{FrontSide}}<hr id="answer">{{Answer}}',
-            },
-        ])
     for card in cards:
         my_deck.add_note(genanki.Note(
             model=model,
-            fields=[card['question'], card['answer']]
         ))
-    package_path = os.path.join(tempfile.gettempdir(), f"{deck_name}.apkg")
-    genanki.Package(my_deck).write_to_file(package_path)
-    return package_path

 import pytesseract
 import easyocr
 import whisper
+import tempfile
 import os
 import uuid
+import genanki
+import docx
+import yt_dlp
+import csv
+from transformers import pipeline
+from PIL import Image
+# === Extract Text From Sources ===
+def process_pdf(path):
     text = ""
+    doc = fitz.open(path)
     for page in doc:
+        t = page.get_text()
+        if t.strip():
+            text += t
+        else:
             pix = page.get_pixmap()
             img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
             text += pytesseract.image_to_string(img)
     return text
+def process_image(path):
     reader = easyocr.Reader(['en'])
+    result = reader.readtext(path, detail=0)
     return "\n".join(result)
+def process_audio(path):
     model = whisper.load_model("base")
+    result = model.transcribe(path)
     return result["text"]
+def process_text(path):
+    if path.endswith(".txt"):
+        with open(path, "r", encoding="utf-8") as f:
+            return f.read()
+    elif path.endswith(".docx"):
+        doc = docx.Document(path)
+        return "\n".join([para.text for para in doc.paragraphs])
+    return ""
+def process_youtube(url):
+    temp_dir = tempfile.gettempdir()
+    audio_path = os.path.join(temp_dir, f"{uuid.uuid4()}.mp3")
+    ydl_opts = {
+        'format': 'bestaudio/best',
+        'outtmpl': audio_path,
+        'postprocessors': [{
+            'key': 'FFmpegExtractAudio',
+            'preferredcodec': 'mp3',
+            'preferredquality': '192',
+        }],
+        'quiet': True,
+    }
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        ydl.download([url])
+    return process_audio(audio_path)
+# === Flashcard Generator ===
+def generate_flashcards(text, model_name="t5-base", types=["Q&A"]):
+    generator = pipeline("text2text-generation", model=model_name, max_length=64)
+    chunks = [text[i:i + 400] for i in range(0, len(text), 400)]
+    cards = []
     for chunk in chunks:
+        if "Q&A" in types:
+            qa = generator(f"Generate a question and answer from:\n{chunk}")[0]['generated_text']
+            q, a = qa.split(":", 1) if ":" in qa else ("Question", qa)
+            cards.append({"question": q.strip(), "answer": a.strip(), "tag": "Q&A"})
+        if "Cloze" in types:
+            cloze = generator(f"Create a cloze deletion flashcard from:\n{chunk}")[0]['generated_text']
+            cards.append({"question": cloze.strip(), "answer": "[...]", "tag": "Cloze"})
+        if "MCQ" in types:
+            mcq = generator(f"Generate a multiple choice question from:\n{chunk}")[0]['generated_text']
+            cards.append({"question": mcq.strip(), "answer": "Choose best option", "tag": "MCQ"})
+        if "Reverse" in types:
+            qa = generator(f"Generate a question and answer from:\n{chunk}")[0]['generated_text']
+            q, a = qa.split(":", 1) if ":" in qa else ("Question", qa)
+            cards.append({"question": a.strip(), "answer": q.strip(), "tag": "Reverse"})
+        if len(cards) >= 20:
             break
+    return cards
+# === Exporters ===
 def export_to_apkg(cards, deck_name):
     deck_id = int(str(uuid.uuid4().int)[:10])
     my_deck = genanki.Deck(deck_id, deck_name)
     model = genanki.Model(
         1607392319,
         'BatAnkiModel',
+        fields=[{'name': 'Question'}, {'name': 'Answer'}, {'name': 'Tag'}],
+        templates=[{
+            'name': 'Card 1',
+            'qfmt': '{{Question}}<br><i>Tag: {{Tag}}</i>',
+            'afmt': '{{FrontSide}}<hr id="answer">{{Answer}}',
+        }]
+    )
     for card in cards:
         my_deck.add_note(genanki.Note(
             model=model,
+            fields=[card['question'], card['answer'], card.get('tag', "")]
         ))
+    pkg_path = os.path.join(tempfile.gettempdir(), f"{deck_name}.apkg")
+    genanki.Package(my_deck).write_to_file(pkg_path)
+    return pkg_path
+def export_to_csv(cards, deck_name):
+    path = os.path.join(tempfile.gettempdir(), f"{deck_name}.csv")
+    with open(path, "w", newline="", encoding="utf-8") as f:
+        writer = csv.writer(f)
+        writer.writerow(["Question", "Answer", "Tag"])
+        for card in cards:
+            writer.writerow([card["question"], card["answer"], card.get("tag", "")])
+    return path