Spaces:
Runtime error
Runtime error
File size: 1,980 Bytes
b4c7867 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 | import re
from gtts import gTTS
from src.backend.llm import llm_generate
from src.backend.rag import format_sources, context_block
def generate_report(topic: str, hits, extra_prompt: str):
prompt = f"""
Write a markdown study report grounded ONLY in the sources.
Every non-trivial claim must include citations like [S1].
Topic: {topic}
Extra instructions: {extra_prompt or "(none)"}
Sources list:
{format_sources(hits)}
Excerpts:
{context_block(hits)}
Output:
# Report
## Key Concepts
## Detailed Notes
## Key Takeaways
"""
return llm_generate(prompt, max_new_tokens=900, temperature=0.25)
def generate_quiz(topic: str, hits, extra_prompt: str):
prompt = f"""
Write a markdown quiz grounded ONLY in the sources.
Create 8 questions:
- 5 multiple choice
- 3 short answer
Then include an Answer Key with explanations.
Explanations must include citations like [S1].
Topic: {topic}
Extra instructions: {extra_prompt or "(none)"}
Sources list:
{format_sources(hits)}
Excerpts:
{context_block(hits)}
Output:
# Quiz
## Questions
## Answer Key
"""
return llm_generate(prompt, max_new_tokens=900, temperature=0.25)
def generate_podcast_transcript(topic: str, hits, extra_prompt: str):
prompt = f"""
Write a markdown podcast transcript grounded ONLY in the sources.
Two speakers: Speaker 1 and Speaker 2.
Every non-trivial claim must include citations like [S1].
Topic: {topic}
Extra instructions: {extra_prompt or "(none)"}
Sources list:
{format_sources(hits)}
Excerpts:
{context_block(hits)}
Output:
# Podcast Transcript
**Speaker 1:** ...
**Speaker 2:** ...
End with Sources section.
"""
return llm_generate(prompt, max_new_tokens=900, temperature=0.3)
def transcript_to_mp3(transcript_md: str, out_path: str):
text = re.sub(r"\[(S\d+)\]", "", transcript_md)
text = re.sub(r"#+", "", text)
text = re.sub(r"\*\*", "", text)
text = re.sub(r"\s+", " ", text).strip()
text = text[:4500]
gTTS(text=text, lang="en").save(out_path) |