File size: 1,980 Bytes
b4c7867
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import re
from gtts import gTTS
from src.backend.llm import llm_generate
from src.backend.rag import format_sources, context_block

def generate_report(topic: str, hits, extra_prompt: str):
    prompt = f"""
Write a markdown study report grounded ONLY in the sources.
Every non-trivial claim must include citations like [S1].

Topic: {topic}
Extra instructions: {extra_prompt or "(none)"}

Sources list:
{format_sources(hits)}

Excerpts:
{context_block(hits)}

Output:
# Report
## Key Concepts
## Detailed Notes
## Key Takeaways
"""
    return llm_generate(prompt, max_new_tokens=900, temperature=0.25)

def generate_quiz(topic: str, hits, extra_prompt: str):
    prompt = f"""
Write a markdown quiz grounded ONLY in the sources.
Create 8 questions:
- 5 multiple choice
- 3 short answer
Then include an Answer Key with explanations.
Explanations must include citations like [S1].

Topic: {topic}
Extra instructions: {extra_prompt or "(none)"}

Sources list:
{format_sources(hits)}

Excerpts:
{context_block(hits)}

Output:
# Quiz
## Questions
## Answer Key
"""
    return llm_generate(prompt, max_new_tokens=900, temperature=0.25)

def generate_podcast_transcript(topic: str, hits, extra_prompt: str):
    prompt = f"""
Write a markdown podcast transcript grounded ONLY in the sources.
Two speakers: Speaker 1 and Speaker 2.
Every non-trivial claim must include citations like [S1].

Topic: {topic}
Extra instructions: {extra_prompt or "(none)"}

Sources list:
{format_sources(hits)}

Excerpts:
{context_block(hits)}

Output:
# Podcast Transcript
**Speaker 1:** ...
**Speaker 2:** ...
End with Sources section.
"""
    return llm_generate(prompt, max_new_tokens=900, temperature=0.3)

def transcript_to_mp3(transcript_md: str, out_path: str):
    text = re.sub(r"\[(S\d+)\]", "", transcript_md)
    text = re.sub(r"#+", "", text)
    text = re.sub(r"\*\*", "", text)
    text = re.sub(r"\s+", " ", text).strip()
    text = text[:4500]
    gTTS(text=text, lang="en").save(out_path)