Spaces:
Running
Running
Commit
·
4efdaca
1
Parent(s):
4daef9c
update: add UID in url
Browse files
app.py
CHANGED
|
@@ -322,11 +322,7 @@ def get_transcript_for_url(url: str) -> dict:
|
|
| 322 |
|
| 323 |
|
| 324 |
def get_initial_analysis(
|
| 325 |
-
transcript_processor: TranscriptProcessor,
|
| 326 |
-
cid,
|
| 327 |
-
rsid,
|
| 328 |
-
origin,
|
| 329 |
-
ct,
|
| 330 |
) -> str:
|
| 331 |
"""Perform initial analysis of the transcript using OpenAI."""
|
| 332 |
try:
|
|
@@ -339,7 +335,7 @@ def get_initial_analysis(
|
|
| 339 |
link_start = "https"
|
| 340 |
|
| 341 |
if ct == "si": # street interview
|
| 342 |
-
prompt = f"""This is a transcript for a street interview. Transcript: {transcript}
|
| 343 |
|
| 344 |
Your task is to analyze this street interview transcript and identify the final/best timestamps for each topic or question discussed. Here are the key rules:
|
| 345 |
|
|
@@ -366,32 +362,32 @@ Return format:
|
|
| 366 |
|
| 367 |
[Question Title]
|
| 368 |
Total takes: [X] (Include ONLY if content appears more than once)
|
| 369 |
-
- [Take 1. <div id='topic' style="display: inline"> 15s at 12:30 </div>]({link_start}://{{origin}}/collab/{{cid}}/{{rsid}}?st={{750}}&et={{765}})
|
| 370 |
-
- [Take 2. <div id='topic' style="display: inline"> 30s at 14:45 </div>]({link_start}://{{origin}}/collab/{{cid}}/{{rsid}}?st={{885}}&et={{915}})
|
| 371 |
...
|
| 372 |
-
- [Take X (Best). <div id='topic' style="display: inline"> 1m 10s at 16:20 </div>]({link_start}://{{origin}}/collab/{{cid}}/{{rsid}}?st={{980}}&et={{1050}})
|
| 373 |
|
| 374 |
URL formatting:
|
| 375 |
- Convert timestamps to seconds (e.g., 10:13 → 613)
|
| 376 |
-
- Format: {link_start}://[origin]/colab/[cid]/[rsid]?st=[start_seconds]&et=[end_seconds]
|
| 377 |
- Parameters after RSID must start with ? and subsequent parameters use &
|
| 378 |
|
| 379 |
Example:
|
| 380 |
1. Introduction
|
| 381 |
Total takes: 2
|
| 382 |
-
- [Take 1. <div id='topic' style="display: inline"> 10s at 09:45]({link_start}://{{origin}}/collab/{{cid}}/{{rsid}}?st={{585}}&et={{595}})
|
| 383 |
-
- [Take 1. <div id='topic' style="display: inline"> 20s at 25:45]({link_start}://{{origin}}/collab/{{cid}}/{{rsid}}?st={{1245}}&et={{1265}})
|
| 384 |
-
- [Take 3 (Best). <div id='topic' style="display: inline"> 5s at 10:13 </div>]({link_start}://roll.ai/colab/1234aq_12314/51234151?st=613&et=618)"""
|
| 385 |
else:
|
| 386 |
-
prompt = f"""Given the transcript {transcript}, analyze speakers' discussions to identify compelling social media clips. For each speaker, identify key topics that mention people, news, events, trends, or sources.
|
| 387 |
|
| 388 |
Format requirements:
|
| 389 |
|
| 390 |
1. SPEAKER FORMAT:
|
| 391 |
**Speaker Name**
|
| 392 |
-
1. [Topic title <div id='topic' style="display: inline"> 20s at 12:30 </div>]({link_start}://{{origin}}/collab/{{cid}}/{{rsid}}?st={{750}}&et={{770}})
|
| 393 |
-
2. [Topic title <div id='topic' style="display: inline"> 45s at 14:45 </div>]({link_start}://{{origin}}/collab/{{cid}}/{{rsid}}?st={{885}}&et={{930}})
|
| 394 |
-
3. [Topic title <div id='topic' style="display: inline"> 55s at 16:20 </div>]({link_start}://{{origin}}/collab/{{cid}}/{{rsid}}?st={{980}}&et={{1035}})
|
| 395 |
|
| 396 |
2. TIMESTAMP RULES:
|
| 397 |
- Start time (st): Must begin exactly when speaker starts discussing the specific topic
|
|
@@ -408,7 +404,7 @@ Format requirements:
|
|
| 408 |
- Speaker names: Use markdown bold (**Name**)
|
| 409 |
- Topic titles: First word capitalized, rest lowercase
|
| 410 |
- Each topic must be a clickable link with correct timestamp
|
| 411 |
-
- URL format: {link_start}://{{origin}}/collab/{{cid}}/{{rsid}}?st={{start_time_in_sec}}&et={{end_time_in_sec}}
|
| 412 |
|
| 413 |
4. TOPIC SELECTION:
|
| 414 |
- Prioritize engaging, viral-worthy content
|
|
@@ -514,11 +510,11 @@ If a user asks timestamps for a specific topic, find the start time and end time
|
|
| 514 |
If the user provides a link to the agenda, use the correct_speaker_name_with_url function to correct the speaker names based on the agenda.
|
| 515 |
If the user provides the correct call type, use the correct_call_type function to correct the call type. Call Type for street interviews is 'si'.
|
| 516 |
Answer format:
|
| 517 |
-
Topic: Heading [Timestamp: start_time - end_time]({link_start}://{{origin}}/collab/{{cid}}/{{rsid}}?st={{start_time_in_sec}}&et={{end_time_in_sec}}"').
|
| 518 |
|
| 519 |
For Example:
|
| 520 |
If the start time is 10:13 and end time is 10:18, the url will be:
|
| 521 |
-
{link_start}://roll.ai/colab/1234aq_12314/51234151?st=613&et=618
|
| 522 |
In the URL, make sure that after RSID there is ? and then rest of the fields are added via &.
|
| 523 |
"""
|
| 524 |
messages = [{"role": "system", "content": prompt}]
|
|
|
|
| 322 |
|
| 323 |
|
| 324 |
def get_initial_analysis(
|
| 325 |
+
transcript_processor: TranscriptProcessor, cid, rsid, origin, ct, uid
|
|
|
|
|
|
|
|
|
|
|
|
|
| 326 |
) -> str:
|
| 327 |
"""Perform initial analysis of the transcript using OpenAI."""
|
| 328 |
try:
|
|
|
|
| 335 |
link_start = "https"
|
| 336 |
|
| 337 |
if ct == "si": # street interview
|
| 338 |
+
prompt = f"""This is a transcript for a street interview. Transcript: {transcript} User ID UID: {uid}
|
| 339 |
|
| 340 |
Your task is to analyze this street interview transcript and identify the final/best timestamps for each topic or question discussed. Here are the key rules:
|
| 341 |
|
|
|
|
| 362 |
|
| 363 |
[Question Title]
|
| 364 |
Total takes: [X] (Include ONLY if content appears more than once)
|
| 365 |
+
- [Take 1. <div id='topic' style="display: inline"> 15s at 12:30 </div>]({link_start}://{{origin}}/collab/{{cid}}/{{rsid}}?st={{750}}&et={{765}}&uid={{uid}})
|
| 366 |
+
- [Take 2. <div id='topic' style="display: inline"> 30s at 14:45 </div>]({link_start}://{{origin}}/collab/{{cid}}/{{rsid}}?st={{885}}&et={{915}}&uid={{uid}})
|
| 367 |
...
|
| 368 |
+
- [Take X (Best). <div id='topic' style="display: inline"> 1m 10s at 16:20 </div>]({link_start}://{{origin}}/collab/{{cid}}/{{rsid}}?st={{980}}&et={{1050}}&uid={{uid}})
|
| 369 |
|
| 370 |
URL formatting:
|
| 371 |
- Convert timestamps to seconds (e.g., 10:13 → 613)
|
| 372 |
+
- Format: {link_start}://[origin]/colab/[cid]/[rsid]?st=[start_seconds]&et=[end_seconds]&uid=[unique_id]
|
| 373 |
- Parameters after RSID must start with ? and subsequent parameters use &
|
| 374 |
|
| 375 |
Example:
|
| 376 |
1. Introduction
|
| 377 |
Total takes: 2
|
| 378 |
+
- [Take 1. <div id='topic' style="display: inline"> 10s at 09:45]({link_start}://{{origin}}/collab/{{cid}}/{{rsid}}?st={{585}}&et={{595}}&uid={{uid}})
|
| 379 |
+
- [Take 1. <div id='topic' style="display: inline"> 20s at 25:45]({link_start}://{{origin}}/collab/{{cid}}/{{rsid}}?st={{1245}}&et={{1265}}&uid={{uid}}))
|
| 380 |
+
- [Take 3 (Best). <div id='topic' style="display: inline"> 5s at 10:13 </div>]({link_start}://roll.ai/colab/1234aq_12314/51234151?st=613&et=618&uid=82314)"""
|
| 381 |
else:
|
| 382 |
+
prompt = f"""Given the transcript {transcript}, user id UID: {uid} analyze speakers' discussions to identify compelling social media clips. For each speaker, identify key topics that mention people, news, events, trends, or sources.
|
| 383 |
|
| 384 |
Format requirements:
|
| 385 |
|
| 386 |
1. SPEAKER FORMAT:
|
| 387 |
**Speaker Name**
|
| 388 |
+
1. [Topic title <div id='topic' style="display: inline"> 20s at 12:30 </div>]({link_start}://{{origin}}/collab/{{cid}}/{{rsid}}?st={{750}}&et={{770}}&uid={{uid}}))
|
| 389 |
+
2. [Topic title <div id='topic' style="display: inline"> 45s at 14:45 </div>]({link_start}://{{origin}}/collab/{{cid}}/{{rsid}}?st={{885}}&et={{930}}&uid={{uid}}))
|
| 390 |
+
3. [Topic title <div id='topic' style="display: inline"> 55s at 16:20 </div>]({link_start}://{{origin}}/collab/{{cid}}/{{rsid}}?st={{980}}&et={{1035}}&uid={{uid}}))
|
| 391 |
|
| 392 |
2. TIMESTAMP RULES:
|
| 393 |
- Start time (st): Must begin exactly when speaker starts discussing the specific topic
|
|
|
|
| 404 |
- Speaker names: Use markdown bold (**Name**)
|
| 405 |
- Topic titles: First word capitalized, rest lowercase
|
| 406 |
- Each topic must be a clickable link with correct timestamp
|
| 407 |
+
- URL format: {link_start}://{{origin}}/collab/{{cid}}/{{rsid}}?st={{start_time_in_sec}}&et={{end_time_in_sec}}&uid={{uid}})
|
| 408 |
|
| 409 |
4. TOPIC SELECTION:
|
| 410 |
- Prioritize engaging, viral-worthy content
|
|
|
|
| 510 |
If the user provides a link to the agenda, use the correct_speaker_name_with_url function to correct the speaker names based on the agenda.
|
| 511 |
If the user provides the correct call type, use the correct_call_type function to correct the call type. Call Type for street interviews is 'si'.
|
| 512 |
Answer format:
|
| 513 |
+
Topic: Heading [Timestamp: start_time - end_time]({link_start}://{{origin}}/collab/{{cid}}/{{rsid}}?st={{start_time_in_sec}}&et={{end_time_in_sec}}&uid={{uid}}"').
|
| 514 |
|
| 515 |
For Example:
|
| 516 |
If the start time is 10:13 and end time is 10:18, the url will be:
|
| 517 |
+
{link_start}://roll.ai/colab/1234aq_12314/51234151?st=613&et=618&uid=82314
|
| 518 |
In the URL, make sure that after RSID there is ? and then rest of the fields are added via &.
|
| 519 |
"""
|
| 520 |
messages = [{"role": "system", "content": prompt}]
|