|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import gradio as gr |
|
|
import PyPDF2 |
|
|
import re |
|
|
import json |
|
|
from typing import List, Dict |
|
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
|
|
import torch |
|
|
import tempfile |
|
|
import os |
|
|
|
|
|
|
|
|
print("Loading models... This may take a minute on first run.") |
|
|
|
|
|
model_name = "valhalla/t5-small-qg-hl" |
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
model = AutoModelForSeq2SeqLM.from_pretrained(model_name) |
|
|
|
|
|
|
|
|
model.eval() |
|
|
device = torch.device("cpu") |
|
|
model.to(device) |
|
|
|
|
|
def extract_key_phrases(text: str) -> List[str]: |
|
|
"""Extract potential answer candidates from text.""" |
|
|
|
|
|
candidates = [] |
|
|
|
|
|
|
|
|
capitalized = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b', text) |
|
|
candidates.extend(capitalized[:3]) |
|
|
|
|
|
|
|
|
|
|
|
concept_patterns = [ |
|
|
r'(?:process|method|technique|approach|concept|theory|principle|system) of ([^,.]{10,50})', |
|
|
r'(?:known as|called|termed|referred to as) ([^,.]{5,40})', |
|
|
r'(?:is|are|was|were) (\w+(?:\s+\w+){1,4}) (?:that|which|who)', |
|
|
] |
|
|
|
|
|
for pattern in concept_patterns: |
|
|
matches = re.findall(pattern, text, re.IGNORECASE) |
|
|
candidates.extend(matches[:2]) |
|
|
|
|
|
|
|
|
candidates = [c.strip() for c in candidates if len(c.strip()) > 5] |
|
|
return list(dict.fromkeys(candidates))[:5] |
|
|
|
|
|
def generate_questions(context: str, answer: str, question_type: str = "what", max_length: int = 128) -> str: |
|
|
"""Generate a question using T5 model with specified type.""" |
|
|
try: |
|
|
|
|
|
input_text = f"generate question: <hl> {answer} <hl> {context}" |
|
|
|
|
|
|
|
|
inputs = tokenizer( |
|
|
input_text, |
|
|
return_tensors="pt", |
|
|
max_length=512, |
|
|
truncation=True, |
|
|
padding=True |
|
|
).to(device) |
|
|
|
|
|
|
|
|
temperature = 0.7 if question_type == "what" else 0.85 |
|
|
num_beams = 4 if question_type == "what" else 5 |
|
|
|
|
|
|
|
|
with torch.no_grad(): |
|
|
outputs = model.generate( |
|
|
**inputs, |
|
|
max_length=max_length, |
|
|
num_beams=num_beams, |
|
|
early_stopping=True, |
|
|
do_sample=True, |
|
|
temperature=temperature |
|
|
) |
|
|
|
|
|
|
|
|
question = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
|
|
|
|
|
question = re.sub(r'^(question:|q:)', '', question, flags=re.IGNORECASE).strip() |
|
|
|
|
|
|
|
|
question = improve_question(question, answer, context, question_type) |
|
|
|
|
|
return question if len(question) > 10 else "" |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error generating question: {e}") |
|
|
return "" |
|
|
|
|
|
def improve_question(question: str, answer: str, context: str, question_type: str) -> str: |
|
|
"""Post-process generated questions to improve quality and add variety.""" |
|
|
|
|
|
|
|
|
if not question.endswith('?'): |
|
|
question = question.rstrip('.') + '?' |
|
|
|
|
|
|
|
|
question = question[0].upper() + question[1:] if question else question |
|
|
|
|
|
|
|
|
if question_type == "why" and not question.lower().startswith("why"): |
|
|
|
|
|
if re.search(r'\b(is|are|was|were|does|do|did)\b', question, re.IGNORECASE): |
|
|
question = create_why_question(question, answer, context) |
|
|
|
|
|
elif question_type == "how" and not question.lower().startswith("how"): |
|
|
|
|
|
if re.search(r'\b(does|do|did|can|could)\b', question, re.IGNORECASE): |
|
|
question = create_how_question(question, answer, context) |
|
|
|
|
|
return question |
|
|
|
|
|
def create_why_question(base_question: str, answer: str, context: str) -> str: |
|
|
"""Transform or create a 'why' question.""" |
|
|
|
|
|
|
|
|
causal_patterns = [ |
|
|
r'because ([^,.]{10,60})', |
|
|
r'due to ([^,.]{10,60})', |
|
|
r'as a result of ([^,.]{10,60})', |
|
|
r'(?:leads to|causes|results in) ([^,.]{10,60})', |
|
|
r'in order to ([^,.]{10,60})' |
|
|
] |
|
|
|
|
|
for pattern in causal_patterns: |
|
|
match = re.search(pattern, context, re.IGNORECASE) |
|
|
if match: |
|
|
|
|
|
subject_match = re.search(r'([A-Z][a-z]+(?:\s+[a-z]+){0,3})\s+(?:is|are|was|were|does|do)', context) |
|
|
if subject_match: |
|
|
subject = subject_match.group(1) |
|
|
return f"Why does {subject.lower()} occur?" |
|
|
|
|
|
|
|
|
|
|
|
words = answer.split() |
|
|
if len(words) > 3: |
|
|
return f"Why is {' '.join(words[:4])}... important?" |
|
|
|
|
|
return base_question |
|
|
|
|
|
def create_how_question(base_question: str, answer: str, context: str) -> str: |
|
|
"""Transform or create a 'how' question.""" |
|
|
|
|
|
|
|
|
process_patterns = [ |
|
|
r'(process|method|procedure|technique|approach) (?:of|for|to) ([^,.]{10,60})', |
|
|
r'by ([^,.]{10,60})', |
|
|
r'through ([^,.]{10,60})' |
|
|
] |
|
|
|
|
|
for pattern in process_patterns: |
|
|
match = re.search(pattern, context, re.IGNORECASE) |
|
|
if match: |
|
|
if len(match.groups()) > 1: |
|
|
process = match.group(2) |
|
|
return f"How does {process.lower()} work?" |
|
|
else: |
|
|
process = match.group(1) |
|
|
return f"How is {process.lower()} achieved?" |
|
|
|
|
|
|
|
|
verbs = re.findall(r'\b(works?|functions?|operates?|performs?|executes?)\b', context, re.IGNORECASE) |
|
|
if verbs: |
|
|
subject_match = re.search(r'([A-Z][a-z]+(?:\s+[a-z]+){0,3})\s+' + verbs[0], context, re.IGNORECASE) |
|
|
if subject_match: |
|
|
subject = subject_match.group(1) |
|
|
return f"How does {subject.lower()} {verbs[0].lower()}?" |
|
|
|
|
|
return base_question |
|
|
|
|
|
def extract_text_from_pdf(pdf_file) -> str: |
|
|
"""Extract text from uploaded PDF file.""" |
|
|
text = "" |
|
|
try: |
|
|
if isinstance(pdf_file, str): |
|
|
pdf_reader = PyPDF2.PdfReader(pdf_file) |
|
|
else: |
|
|
pdf_reader = PyPDF2.PdfReader(pdf_file) |
|
|
|
|
|
for page in pdf_reader.pages: |
|
|
page_text = page.extract_text() |
|
|
if page_text: |
|
|
text += page_text + "\n" |
|
|
except Exception as e: |
|
|
return f"Error reading PDF: {str(e)}" |
|
|
|
|
|
return text |
|
|
|
|
|
def clean_text(text: str) -> str: |
|
|
"""Clean and preprocess extracted text.""" |
|
|
|
|
|
text = re.sub(r'\s+', ' ', text) |
|
|
|
|
|
text = re.sub(r'[^\w\s.,;!?-]', '', text) |
|
|
return text.strip() |
|
|
|
|
|
def chunk_text(text: str, max_chunk_size: int = 512, overlap: int = 50) -> List[str]: |
|
|
"""Split text into overlapping chunks for processing.""" |
|
|
sentences = re.split(r'(?<=[.!?])\s+', text) |
|
|
chunks = [] |
|
|
current_chunk = "" |
|
|
|
|
|
for sentence in sentences: |
|
|
if len(current_chunk) + len(sentence) < max_chunk_size: |
|
|
current_chunk += " " + sentence |
|
|
else: |
|
|
if current_chunk: |
|
|
chunks.append(current_chunk.strip()) |
|
|
current_chunk = sentence |
|
|
|
|
|
if current_chunk: |
|
|
chunks.append(current_chunk.strip()) |
|
|
|
|
|
|
|
|
overlapped_chunks = [] |
|
|
for i, chunk in enumerate(chunks): |
|
|
if i > 0 and overlap > 0: |
|
|
prev_sentences = chunks[i-1].split('. ') |
|
|
overlap_text = '. '.join(prev_sentences[-2:]) if len(prev_sentences) > 1 else chunks[i-1][-overlap:] |
|
|
chunk = overlap_text + " " + chunk |
|
|
overlapped_chunks.append(chunk) |
|
|
|
|
|
return overlapped_chunks |
|
|
|
|
|
def generate_qa_pairs(chunk: str, num_questions: int = 3) -> List[Dict[str, str]]: |
|
|
"""Generate question-answer pairs from a text chunk with variety.""" |
|
|
flashcards = [] |
|
|
|
|
|
|
|
|
words = chunk.split() |
|
|
if len(words) < 20: |
|
|
return [] |
|
|
|
|
|
try: |
|
|
|
|
|
key_phrases = extract_key_phrases(chunk) |
|
|
|
|
|
|
|
|
sentences = [s.strip() for s in chunk.split('. ') if len(s.strip()) > 20] |
|
|
|
|
|
|
|
|
answer_candidates = key_phrases + sentences[:2] |
|
|
|
|
|
if len(answer_candidates) < 1: |
|
|
return [] |
|
|
|
|
|
|
|
|
question_types = ["what", "why", "how"] |
|
|
|
|
|
|
|
|
questions_generated = 0 |
|
|
for i, answer in enumerate(answer_candidates): |
|
|
if questions_generated >= num_questions: |
|
|
break |
|
|
|
|
|
|
|
|
if len(answer.split()) < 3: |
|
|
continue |
|
|
|
|
|
|
|
|
q_type = question_types[i % len(question_types)] |
|
|
|
|
|
question = generate_questions(chunk, answer, question_type=q_type) |
|
|
|
|
|
if question and question != answer: |
|
|
flashcards.append({ |
|
|
"question": question, |
|
|
"answer": answer, |
|
|
"context": chunk[:200] + "..." if len(chunk) > 200 else chunk, |
|
|
"type": q_type |
|
|
}) |
|
|
questions_generated += 1 |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error generating QA: {e}") |
|
|
|
|
|
return flashcards |
|
|
|
|
|
def process_pdf(pdf_file, questions_per_chunk: int = 3, max_chunks: int = 20): |
|
|
"""Main processing function.""" |
|
|
if pdf_file is None: |
|
|
return "Please upload a PDF file.", "", "", "Your flashcards will appear here..." |
|
|
|
|
|
try: |
|
|
|
|
|
yield "π Extracting text from PDF...", "", "", "Processing..." |
|
|
raw_text = extract_text_from_pdf(pdf_file) |
|
|
|
|
|
if raw_text.startswith("Error"): |
|
|
yield raw_text, "", "", "Error occurred" |
|
|
return |
|
|
|
|
|
if len(raw_text.strip()) < 100: |
|
|
yield "PDF appears to be empty or contains no extractable text.", "", "", "Error occurred" |
|
|
return |
|
|
|
|
|
|
|
|
yield "π§Ή Cleaning text...", "", "", "Processing..." |
|
|
cleaned_text = clean_text(raw_text) |
|
|
|
|
|
|
|
|
yield "βοΈ Chunking text into sections...", "", "", "Processing..." |
|
|
chunks = chunk_text(cleaned_text) |
|
|
|
|
|
|
|
|
chunks = chunks[:max_chunks] |
|
|
|
|
|
|
|
|
all_flashcards = [] |
|
|
total_chunks = len(chunks) |
|
|
|
|
|
for i, chunk in enumerate(chunks): |
|
|
progress = f"π΄ Generating flashcards... ({i+1}/{total_chunks} chunks processed)" |
|
|
yield progress, "", "", "Processing..." |
|
|
|
|
|
cards = generate_qa_pairs(chunk, questions_per_chunk) |
|
|
all_flashcards.extend(cards) |
|
|
|
|
|
if not all_flashcards: |
|
|
yield "Could not generate flashcards from this PDF. Try a PDF with more textual content.", "", "", "No flashcards generated" |
|
|
return |
|
|
|
|
|
|
|
|
yield "β
Finalizing...", "", "", "Almost done..." |
|
|
|
|
|
|
|
|
display_text = format_flashcards_display(all_flashcards) |
|
|
|
|
|
|
|
|
json_output = json.dumps(all_flashcards, indent=2, ensure_ascii=False) |
|
|
|
|
|
|
|
|
csv_lines = ["Question,Answer,Type"] |
|
|
for card in all_flashcards: |
|
|
q = card['question'].replace('"', '""') |
|
|
a = card['answer'].replace('"', '""') |
|
|
t = card.get('type', 'what') |
|
|
csv_lines.append(f'"{q}","{a}","{t}"') |
|
|
csv_output = "\n".join(csv_lines) |
|
|
|
|
|
|
|
|
stats = f"β
Done! Generated {len(all_flashcards)} flashcards (" |
|
|
types_count = {} |
|
|
for card in all_flashcards: |
|
|
t = card.get('type', 'what') |
|
|
types_count[t] = types_count.get(t, 0) + 1 |
|
|
stats += ", ".join([f"{count} {qtype}" for qtype, count in types_count.items()]) + ")" |
|
|
|
|
|
yield stats, csv_output, json_output, display_text |
|
|
|
|
|
except Exception as e: |
|
|
error_msg = f"Error processing PDF: {str(e)}" |
|
|
print(error_msg) |
|
|
yield error_msg, "", "", error_msg |
|
|
|
|
|
def format_flashcards_display(flashcards: List[Dict]) -> str: |
|
|
"""Format flashcards for nice display.""" |
|
|
lines = [f"## π΄ Generated {len(flashcards)} Flashcards\n"] |
|
|
|
|
|
|
|
|
types_count = {} |
|
|
for card in flashcards: |
|
|
t = card.get('type', 'what') |
|
|
types_count[t] = types_count.get(t, 0) + 1 |
|
|
|
|
|
lines.append(f"**Breakdown:** {', '.join([f'{count} {qtype.upper()}' for qtype, count in types_count.items()])}\n") |
|
|
lines.append("---\n") |
|
|
|
|
|
for i, card in enumerate(flashcards, 1): |
|
|
qtype = card.get('type', 'what').upper() |
|
|
emoji = "β" if qtype == "WHAT" else "π€" if qtype == "WHY" else "π§" |
|
|
|
|
|
lines.append(f"### {emoji} Card {i} - {qtype}") |
|
|
lines.append(f"**Q:** {card['question']}") |
|
|
lines.append(f"**A:** {card['answer']}") |
|
|
lines.append(f"*Context: {card['context'][:100]}...*\n") |
|
|
lines.append("---\n") |
|
|
|
|
|
return "\n".join(lines) |
|
|
|
|
|
def create_sample_flashcard(): |
|
|
"""Create a sample flashcard for demo purposes.""" |
|
|
sample = [ |
|
|
{ |
|
|
"question": "What is photosynthesis?", |
|
|
"answer": "Photosynthesis is the process by which plants convert sunlight into energy.", |
|
|
"context": "Photosynthesis is the process by which plants convert sunlight into energy...", |
|
|
"type": "what" |
|
|
}, |
|
|
{ |
|
|
"question": "Why do plants need chlorophyll?", |
|
|
"answer": "Chlorophyll absorbs light energy needed for photosynthesis.", |
|
|
"context": "Chlorophyll absorbs light energy needed for photosynthesis...", |
|
|
"type": "why" |
|
|
}, |
|
|
{ |
|
|
"question": "How do plants convert light into chemical energy?", |
|
|
"answer": "Through the process of photosynthesis in the chloroplasts.", |
|
|
"context": "Through the process of photosynthesis in the chloroplasts...", |
|
|
"type": "how" |
|
|
} |
|
|
] |
|
|
return format_flashcards_display(sample) |
|
|
|
|
|
|
|
|
custom_css = """ |
|
|
.flashcard-container { |
|
|
border: 2px solid #e0e0e0; |
|
|
border-radius: 10px; |
|
|
padding: 20px; |
|
|
margin: 10px 0; |
|
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); |
|
|
color: white; |
|
|
} |
|
|
.question { |
|
|
font-size: 1.2em; |
|
|
font-weight: bold; |
|
|
margin-bottom: 10px; |
|
|
} |
|
|
.answer { |
|
|
font-size: 1em; |
|
|
opacity: 0.9; |
|
|
} |
|
|
""" |
|
|
|
|
|
|
|
|
with gr.Blocks(css=custom_css, title="PDF to Flashcards") as demo: |
|
|
gr.Markdown(""" |
|
|
# π PDF to Flashcards Generator (Enhanced) |
|
|
|
|
|
Upload any PDF document and automatically generate study flashcards with **What, Why, and How** questions using AI. |
|
|
|
|
|
**β¨ New Features:** |
|
|
- π― Generates **What** questions (factual) |
|
|
- π€ Generates **Why** questions (reasoning) |
|
|
- π§ Generates **How** questions (process) |
|
|
- π Improved question quality and variety |
|
|
- π§ Better answer extraction |
|
|
|
|
|
**Core Features:** |
|
|
- π§ Uses local CPU-friendly AI (no GPU needed) |
|
|
- π Extracts text from any PDF |
|
|
- βοΈ Intelligently chunks content |
|
|
- π΄ Generates diverse question-answer pairs |
|
|
- πΎ Export to CSV (Anki-compatible) or JSON |
|
|
|
|
|
*Note: Processing is done entirely on CPU, so large PDFs may take a few minutes.* |
|
|
""") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
pdf_input = gr.File( |
|
|
label="Upload PDF", |
|
|
file_types=[".pdf"], |
|
|
type="filepath" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
questions_per_chunk = gr.Slider( |
|
|
minimum=1, |
|
|
maximum=6, |
|
|
value=3, |
|
|
step=1, |
|
|
label="Questions per section" |
|
|
) |
|
|
max_chunks = gr.Slider( |
|
|
minimum=5, |
|
|
maximum=50, |
|
|
value=20, |
|
|
step=5, |
|
|
label="Max sections to process" |
|
|
) |
|
|
|
|
|
process_btn = gr.Button("π Generate Flashcards", variant="primary") |
|
|
|
|
|
gr.Markdown(""" |
|
|
### π‘ Tips: |
|
|
- Text-based PDFs work best (scanned images won't work) |
|
|
- Academic papers and articles work great |
|
|
- Adjust "Questions per section" for more variety |
|
|
- Higher questions per section = more Why/How questions |
|
|
""") |
|
|
|
|
|
with gr.Column(scale=2): |
|
|
status_text = gr.Textbox( |
|
|
label="Status", |
|
|
value="Ready to process PDF...", |
|
|
interactive=False |
|
|
) |
|
|
|
|
|
output_display = gr.Markdown( |
|
|
label="Generated Flashcards", |
|
|
value="Your flashcards will appear here..." |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
csv_output = gr.Textbox( |
|
|
label="CSV Format (for Anki import)", |
|
|
lines=10, |
|
|
visible=True |
|
|
) |
|
|
gr.Markdown("*Copy the CSV content and save as `.csv` file to import into Anki*") |
|
|
|
|
|
with gr.Column(): |
|
|
json_output = gr.Textbox( |
|
|
label="JSON Format", |
|
|
lines=10, |
|
|
visible=True |
|
|
) |
|
|
gr.Markdown("*Raw JSON data for custom applications*") |
|
|
|
|
|
|
|
|
process_btn.click( |
|
|
fn=process_pdf, |
|
|
inputs=[pdf_input, questions_per_chunk, max_chunks], |
|
|
outputs=[status_text, csv_output, json_output, output_display] |
|
|
) |
|
|
|
|
|
|
|
|
gr.Markdown("---") |
|
|
gr.Markdown("### π― Example Output Format") |
|
|
gr.Markdown(create_sample_flashcard()) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |