Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -87,7 +87,7 @@ class DocumentRAG:
|
|
| 87 |
|
| 88 |
# Combine text for summary
|
| 89 |
combined_text = " ".join([doc.page_content for doc in documents])
|
| 90 |
-
self.document_summary = combined_text
|
| 91 |
|
| 92 |
# Create embeddings and initialize retrieval chain
|
| 93 |
embeddings = OpenAIEmbeddings(api_key=self.api_key)
|
|
@@ -109,8 +109,8 @@ class DocumentRAG:
|
|
| 109 |
except Exception as e:
|
| 110 |
return f"Error processing documents: {str(e)}"
|
| 111 |
|
| 112 |
-
def generate_summary(self, text
|
| 113 |
-
"""Generate a summary of the provided text
|
| 114 |
if not self.api_key:
|
| 115 |
return "API Key not set. Please set it in the environment variables."
|
| 116 |
try:
|
|
@@ -118,7 +118,7 @@ class DocumentRAG:
|
|
| 118 |
response = client.chat.completions.create(
|
| 119 |
model="gpt-4",
|
| 120 |
messages=[
|
| 121 |
-
{"role": "system", "content":
|
| 122 |
{"role": "user", "content": text[:4000]}
|
| 123 |
],
|
| 124 |
temperature=0.3
|
|
@@ -127,8 +127,8 @@ class DocumentRAG:
|
|
| 127 |
except Exception as e:
|
| 128 |
return f"Error generating summary: {str(e)}"
|
| 129 |
|
| 130 |
-
def create_podcast(self
|
| 131 |
-
"""Generate a podcast script and audio
|
| 132 |
if not self.document_summary:
|
| 133 |
return "Please process documents before generating a podcast.", None
|
| 134 |
|
|
@@ -142,7 +142,7 @@ class DocumentRAG:
|
|
| 142 |
script_response = client.chat.completions.create(
|
| 143 |
model="gpt-4",
|
| 144 |
messages=[
|
| 145 |
-
{"role": "system", "content":
|
| 146 |
{"role": "user", "content": f"""Based on the following document summary, create a 1-2 minute podcast script:
|
| 147 |
1. Clearly label the dialogue as 'Host 1:' and 'Host 2:'
|
| 148 |
2. Keep the content engaging and insightful.
|
|
@@ -157,20 +157,76 @@ class DocumentRAG:
|
|
| 157 |
if not script:
|
| 158 |
return "Error: Failed to generate podcast script.", None
|
| 159 |
|
| 160 |
-
#
|
| 161 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
|
| 163 |
except Exception as e:
|
| 164 |
return f"Error generating podcast: {str(e)}", None
|
| 165 |
|
| 166 |
-
def
|
| 167 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
if not self.qa_chain:
|
| 169 |
return history + [("System", "Please process the documents first.")]
|
| 170 |
try:
|
| 171 |
-
preface =
|
| 172 |
-
Instruction: Respond in
|
| 173 |
-
If you cannot provide an answer, say:
|
| 174 |
"""
|
| 175 |
query = f"{preface}\nQuery: {question}"
|
| 176 |
|
|
@@ -187,7 +243,6 @@ class DocumentRAG:
|
|
| 187 |
except Exception as e:
|
| 188 |
return history + [("System", f"Error: {str(e)}")]
|
| 189 |
|
| 190 |
-
|
| 191 |
# Initialize RAG system in session state
|
| 192 |
if "rag_system" not in st.session_state:
|
| 193 |
st.session_state.rag_system = DocumentRAG()
|
|
@@ -307,4 +362,4 @@ if st.session_state.rag_system.document_summary:
|
|
| 307 |
else:
|
| 308 |
st.error(script)
|
| 309 |
else:
|
| 310 |
-
st.info("Please process documents and generate summaries before creating a podcast.")
|
|
|
|
| 87 |
|
| 88 |
# Combine text for summary
|
| 89 |
combined_text = " ".join([doc.page_content for doc in documents])
|
| 90 |
+
self.document_summary = self.generate_summary(combined_text)
|
| 91 |
|
| 92 |
# Create embeddings and initialize retrieval chain
|
| 93 |
embeddings = OpenAIEmbeddings(api_key=self.api_key)
|
|
|
|
| 109 |
except Exception as e:
|
| 110 |
return f"Error processing documents: {str(e)}"
|
| 111 |
|
| 112 |
+
def generate_summary(self, text):
|
| 113 |
+
"""Generate a summary of the provided text."""
|
| 114 |
if not self.api_key:
|
| 115 |
return "API Key not set. Please set it in the environment variables."
|
| 116 |
try:
|
|
|
|
| 118 |
response = client.chat.completions.create(
|
| 119 |
model="gpt-4",
|
| 120 |
messages=[
|
| 121 |
+
{"role": "system", "content": "Summarize the document content concisely and provide 3-5 key points for discussion."},
|
| 122 |
{"role": "user", "content": text[:4000]}
|
| 123 |
],
|
| 124 |
temperature=0.3
|
|
|
|
| 127 |
except Exception as e:
|
| 128 |
return f"Error generating summary: {str(e)}"
|
| 129 |
|
| 130 |
+
def create_podcast(self):
|
| 131 |
+
"""Generate a podcast script and audio based on the document summary."""
|
| 132 |
if not self.document_summary:
|
| 133 |
return "Please process documents before generating a podcast.", None
|
| 134 |
|
|
|
|
| 142 |
script_response = client.chat.completions.create(
|
| 143 |
model="gpt-4",
|
| 144 |
messages=[
|
| 145 |
+
{"role": "system", "content": "You are a professional podcast producer. Create a natural dialogue based on the provided document summary."},
|
| 146 |
{"role": "user", "content": f"""Based on the following document summary, create a 1-2 minute podcast script:
|
| 147 |
1. Clearly label the dialogue as 'Host 1:' and 'Host 2:'
|
| 148 |
2. Keep the content engaging and insightful.
|
|
|
|
| 157 |
if not script:
|
| 158 |
return "Error: Failed to generate podcast script.", None
|
| 159 |
|
| 160 |
+
# Convert script to audio
|
| 161 |
+
final_audio = AudioSegment.empty()
|
| 162 |
+
is_first_speaker = True
|
| 163 |
+
|
| 164 |
+
lines = [line.strip() for line in script.split("\n") if line.strip()]
|
| 165 |
+
for line in lines:
|
| 166 |
+
if ":" not in line:
|
| 167 |
+
continue
|
| 168 |
+
|
| 169 |
+
speaker, text = line.split(":", 1)
|
| 170 |
+
if not text.strip():
|
| 171 |
+
continue
|
| 172 |
+
|
| 173 |
+
try:
|
| 174 |
+
voice = "nova" if is_first_speaker else "onyx"
|
| 175 |
+
audio_response = client.audio.speech.create(
|
| 176 |
+
model="tts-1",
|
| 177 |
+
voice=voice,
|
| 178 |
+
input=text.strip()
|
| 179 |
+
)
|
| 180 |
+
|
| 181 |
+
temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
|
| 182 |
+
audio_response.stream_to_file(temp_audio_file.name)
|
| 183 |
+
|
| 184 |
+
segment = AudioSegment.from_file(temp_audio_file.name)
|
| 185 |
+
final_audio += segment
|
| 186 |
+
final_audio += AudioSegment.silent(duration=300)
|
| 187 |
+
|
| 188 |
+
is_first_speaker = not is_first_speaker
|
| 189 |
+
except Exception as e:
|
| 190 |
+
print(f"Error generating audio for line: {text}")
|
| 191 |
+
print(f"Details: {e}")
|
| 192 |
+
continue
|
| 193 |
+
|
| 194 |
+
if len(final_audio) == 0:
|
| 195 |
+
return "Error: No audio could be generated.", None
|
| 196 |
+
|
| 197 |
+
output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
|
| 198 |
+
final_audio.export(output_file, format="mp3")
|
| 199 |
+
return script, output_file
|
| 200 |
|
| 201 |
except Exception as e:
|
| 202 |
return f"Error generating podcast: {str(e)}", None
|
| 203 |
|
| 204 |
+
def generate_summary(self, text):
|
| 205 |
+
"""Generate a summary of the provided text."""
|
| 206 |
+
if not self.api_key:
|
| 207 |
+
return "API Key not set. Please set it in the environment variables."
|
| 208 |
+
try:
|
| 209 |
+
client = OpenAI(api_key=self.api_key)
|
| 210 |
+
response = client.chat.completions.create(
|
| 211 |
+
model="gpt-4",
|
| 212 |
+
messages=[
|
| 213 |
+
{"role": "system", "content": "Summarize the document content concisely and provide 3-5 key points for discussion."},
|
| 214 |
+
{"role": "user", "content": text[:4000]}
|
| 215 |
+
],
|
| 216 |
+
temperature=0.3
|
| 217 |
+
)
|
| 218 |
+
return response.choices[0].message.content
|
| 219 |
+
except Exception as e:
|
| 220 |
+
return f"Error generating summary: {str(e)}"
|
| 221 |
+
|
| 222 |
+
def handle_query(self, question, history):
|
| 223 |
+
"""Handle user queries."""
|
| 224 |
if not self.qa_chain:
|
| 225 |
return history + [("System", "Please process the documents first.")]
|
| 226 |
try:
|
| 227 |
+
preface = """
|
| 228 |
+
Instruction: Respond in English. Be professional and concise, keeping the response under 300 words.
|
| 229 |
+
If you cannot provide an answer, say: "I am not sure about this question. Please try asking something else."
|
| 230 |
"""
|
| 231 |
query = f"{preface}\nQuery: {question}"
|
| 232 |
|
|
|
|
| 243 |
except Exception as e:
|
| 244 |
return history + [("System", f"Error: {str(e)}")]
|
| 245 |
|
|
|
|
| 246 |
# Initialize RAG system in session state
|
| 247 |
if "rag_system" not in st.session_state:
|
| 248 |
st.session_state.rag_system = DocumentRAG()
|
|
|
|
| 362 |
else:
|
| 363 |
st.error(script)
|
| 364 |
else:
|
| 365 |
+
st.info("Please process documents and generate summaries before creating a podcast.")
|