File size: 5,092 Bytes
cc9e5f0
 
 
b0e0069
cc9e5f0
 
b0e0069
 
 
 
 
b5d5f6a
cc9e5f0
b0e0069
cc9e5f0
b5d5f6a
 
 
 
 
 
cc9e5f0
b0e0069
 
 
 
cc9e5f0
b5d5f6a
 
 
 
cc9e5f0
 
b0e0069
b5d5f6a
b0e0069
cc9e5f0
 
b0e0069
cc9e5f0
b0e0069
cc9e5f0
 
b0e0069
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cc9e5f0
b5d5f6a
 
 
 
 
 
 
 
 
 
b0e0069
b5d5f6a
 
b0e0069
b5d5f6a
b0e0069
cc9e5f0
b5d5f6a
cc9e5f0
b0e0069
b5d5f6a
 
b0e0069
b5d5f6a
 
b0e0069
b5d5f6a
 
b0e0069
b5d5f6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cc9e5f0
b0e0069
 
b5d5f6a
 
 
 
 
 
cc9e5f0
b5d5f6a
 
cc9e5f0
 
b5d5f6a
 
b0e0069
b5d5f6a
 
 
b0e0069
 
b5d5f6a
cc9e5f0
b5d5f6a
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import fitz  # PyMuPDF
import easyocr
import whisper
import tempfile
import os
import uuid
import genanki
import docx
import yt_dlp
import csv
from transformers import pipeline
import streamlit as st

def process_pdf(path):
    text = ""
    try:
        doc = fitz.open(path)
    except Exception as e:
        st.error(f"❌ Could not open PDF: {str(e)}")
        return ""
    reader = easyocr.Reader(['en'], gpu=False)
    for page in doc:
        t = page.get_text()
        if t.strip():
            text += t
        else:
            pix = page.get_pixmap()
            img_path = f"/tmp/{uuid.uuid4()}.png"
            pix.save(img_path)
            result = reader.readtext(img_path, detail=0)
            text += "\n".join(result)
    return text

def process_image(path):
    reader = easyocr.Reader(['en'], gpu=False)
    result = reader.readtext(path, detail=0)
    return "\n".join(result)

def process_audio(path):
    model = whisper.load_model("base")
    result = model.transcribe(path)
    return result["text"]

def process_text(path):
    if path.endswith(".txt"):
        with open(path, "r", encoding="utf-8") as f:
            return f.read()
    elif path.endswith(".docx"):
        doc = docx.Document(path)
        return "\n".join([para.text for para in doc.paragraphs])
    return ""

def process_youtube(url):
    temp_dir = tempfile.gettempdir()
    audio_path = os.path.join(temp_dir, f"{uuid.uuid4()}.mp3")
    ydl_opts = {
        'format': 'bestaudio/best',
        'outtmpl': audio_path,
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '192',
        }],
        'quiet': True,
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download([url])
    return process_audio(audio_path)

def load_llm_swarm():
    return {
        "fast": pipeline("text2text-generation", model="google/flan-t5-small", max_length=64),
        "bio": pipeline("text2text-generation", model="microsoft/BioGPT-Large", tokenizer="microsoft/BioGPT-Large"),
        "deep": pipeline("text2text-generation", model="tiiuae/falcon-7b-instruct"),
        "mistral": pipeline("text2text-generation", model="mistralai/Mistral-7B-Instruct"),
        "fallback": pipeline("text2text-generation", model="MBZUAI/LaMini-Flan-T5-783M")
    }

llm_swarm = load_llm_swarm()

def generate_flashcards(text, types=["Q&A"], max_cards=100):
    from random import choice
    chunks = [text[i:i + 400] for i in range(0, len(text), 400)]
    chunks = chunks[:max_cards]
    cards = []

    prompts, tags = [], []
    for chunk in chunks:
        if "Q&A" in types:
            prompts.append(f"Generate a question and answer:\n{chunk}")
            tags.append("Q&A")
        if "Cloze" in types:
            prompts.append(f"Make a cloze deletion from:\n{chunk}")
            tags.append("Cloze")
        if "MCQ" in types:
            prompts.append(f"Generate a multiple choice question:\n{chunk}")
            tags.append("MCQ")
        if "Reverse" in types:
            prompts.append(f"Generate a question and answer:\n{chunk}")
            tags.append("Reverse")

    for i, prompt in enumerate(prompts):
        engine_name = choice(list(llm_swarm.keys()))
        engine = llm_swarm[engine_name]
        tag = tags[i]
        try:
            output = engine(prompt, max_length=128)[0]["generated_text"]
        except:
            output = llm_swarm["fallback"](prompt, max_length=64)[0]["generated_text"]

        if tag in ["Q&A", "Reverse"]:
            if ":" in output:
                q, a = output.split(":", 1)
            else:
                q, a = "Question", output
            if tag == "Reverse":
                q, a = a.strip(), q.strip()
            cards.append({"question": q.strip(), "answer": a.strip(), "tag": tag})
        elif tag == "Cloze":
            cards.append({"question": output.strip(), "answer": "[...]", "tag": tag})
        elif tag == "MCQ":
            cards.append({"question": output.strip(), "answer": "Choose best option", "tag": tag})

    return cards

def export_to_csv(cards, filename="batanki_cards.csv"):
    with open(filename, "w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow(["Question", "Answer", "Type"])
        for card in cards:
            writer.writerow([card["question"], card["answer"], card["tag"]])

def export_to_apkg(cards, deck_name="BatAnkiDeck"):
    deck_id = int(uuid.uuid4()) >> 64
    model = genanki.Model(
        1607392319,
        "BatAnkiModel",
        fields=[{"name": "Question"}, {"name": "Answer"}],
        templates=[{
            "name": "Card 1",
            "qfmt": "{{Question}}",
            "afmt": "{{FrontSide}}<hr id='answer'>{{Answer}}",
        }]
    )
    deck = genanki.Deck(deck_id, deck_name)
    for card in cards:
        note = genanki.Note(model=model, fields=[card["question"], card["answer"]])
        deck.add_note(note)
    output_path = f"{deck_name}.apkg"
    genanki.Package(deck).write_to_file(output_path)
    return output_path