fix speaker recognize

This commit is contained in:
2026-03-04 17:02:59 +08:00
parent e085eeddbc
commit 2395c048ff
18 changed files with 652 additions and 5794 deletions

View File

@@ -1,7 +1,7 @@
{
"id": "ef828811-9fa8-407f-848e-6e2f6bd42dc4",
"id": "e563a14c-cd19-4ae0-a2c7-086be560671d",
"status": "TranscriptStatus.completed",
"audio_url": "https://cdn.assemblyai.com/upload/62a20aa11f3abe888d203e9115d588159830540aef97d1fe9a852686fc781c32/f4b9241c-6739-47c3-a1ba-433e3ca252c0",
"audio_url": "https://cdn.assemblyai.com/upload/0ce88635eed2a52796e504b46e1d19f866b91c176763a8587be0e71ffdded48f/e6322bf2-147c-4d50-8d92-c943a6354484",
"text": "To the moon and back. Ha ha. I jump on every planet and every star. Come on, let's go far. Explore the whole universe. Little Malabar, the partying sun. Bum bum bum bum bum bum bum bum. Hey, Lemur, can you hear that? Oh. What is that noise? We are going to find out where it's coming from. No, it's not coming from the moon. Oh. Over there. It's not coming from Mercury either. Wow. Then it's the sun that's making all that noise. And that heat. Can you hear? It sounds like it's coming from the inside of the sun. Shall we go? Yeehaw. Yay. Yoohoo. Yoo hoo. Yay. It's so hot in here. You want to come with us? How about that? Do you want to come with us? Come with us. But where? Here. To the Adam's party. A party inside the sun. Hey, Adams, Now I understand why it's so hot at your plate. You keep on dancing. Wow. It's piping hot. It's so hot that you can't turn into popcorn. These atoms are crazy. The more they mix, the more they produce heat. Whoa. Hey. Hee haw hee haw. Ha ha ha. Ah, sun. Heat is so good. I love it. Ha ha. The popcorn is pretty hot. Thank you, sun. The inside of our star. The sun is made of atoms which bump into each other, merge, and produce heat. Sam.",
"confidence": 0.94673383,
"audio_duration": 241,
@@ -9,9 +9,10 @@
"utterances": [
{
"speaker": "A",
"text": "To the moon and back.",
"text": "To the moon and back. Ha ha. I jump on every planet and every star.",
"start": 800,
"end": 2240,
"end": 7920,
"confidence": 0.9483724,
"words": [
{
"text": "To",
@@ -42,15 +43,7 @@
"start": 1880,
"end": 2240,
"speaker": "A"
}
]
},
{
"speaker": "A",
"text": "Ha ha.",
"start": 2560,
"end": 3120,
"words": [
},
{
"text": "Ha",
"start": 2560,
@@ -62,15 +55,7 @@
"start": 2840,
"end": 3120,
"speaker": "A"
}
]
},
{
"speaker": "A",
"text": "I jump on every planet and every star.",
"start": 4640,
"end": 7920,
"words": [
},
{
"text": "I",
"start": 4640,
@@ -126,6 +111,7 @@
"text": "Come on, let's go far.",
"start": 8240,
"end": 9920,
"confidence": 0.8421875,
"words": [
{
"text": "Come",
@@ -161,9 +147,10 @@
},
{
"speaker": "A",
"text": "Explore the whole universe.",
"text": "Explore the whole universe. Little Malabar, the partying sun. Bum bum bum bum bum bum bum bum. Hey, Lemur, can you hear that? Oh. What is that noise? We are going to find out where it's coming from. No, it's not coming from the moon.",
"start": 10320,
"end": 12400,
"end": 54540,
"confidence": 0.9830892,
"words": [
{
"text": "Explore",
@@ -188,15 +175,7 @@
"start": 11600,
"end": 12400,
"speaker": "A"
}
]
},
{
"speaker": "A",
"text": "Little Malabar, the partying sun.",
"start": 12480,
"end": 15680,
"words": [
},
{
"text": "Little",
"start": 12480,
@@ -226,15 +205,7 @@
"start": 15280,
"end": 15680,
"speaker": "A"
}
]
},
{
"speaker": "A",
"text": "Bum bum bum bum bum bum bum bum.",
"start": 16400,
"end": 21600,
"words": [
},
{
"text": "Bum",
"start": 16400,
@@ -282,15 +253,7 @@
"start": 21040,
"end": 21600,
"speaker": "A"
}
]
},
{
"speaker": "A",
"text": "Hey, Lemur, can you hear that?",
"start": 21840,
"end": 24080,
"words": [
},
{
"text": "Hey,",
"start": 21840,
@@ -326,29 +289,13 @@
"start": 23760,
"end": 24080,
"speaker": "A"
}
]
},
{
"speaker": "A",
"text": "Oh.",
"start": 27520,
"end": 28080,
"words": [
},
{
"text": "Oh.",
"start": 27520,
"end": 28080,
"speaker": "A"
}
]
},
{
"speaker": "A",
"text": "What is that noise?",
"start": 31340,
"end": 32460,
"words": [
},
{
"text": "What",
"start": 31340,
@@ -372,15 +319,7 @@
"start": 31940,
"end": 32460,
"speaker": "A"
}
]
},
{
"speaker": "A",
"text": "We are going to find out where it's coming from.",
"start": 36300,
"end": 38780,
"words": [
},
{
"text": "We",
"start": 36300,
@@ -440,15 +379,7 @@
"start": 38460,
"end": 38780,
"speaker": "A"
}
]
},
{
"speaker": "A",
"text": "No, it's not coming from the moon.",
"start": 52460,
"end": 54540,
"words": [
},
{
"text": "No,",
"start": 52460,
@@ -498,6 +429,7 @@
"text": "Oh.",
"start": 55470,
"end": 55790,
"confidence": 0.7553711,
"words": [
{
"text": "Oh.",
@@ -509,9 +441,10 @@
},
{
"speaker": "A",
"text": "Over there.",
"text": "Over there. It's not coming from Mercury either. Wow. Then it's the sun that's making all that noise. And that heat. Can you hear? It sounds like it's coming from the inside of the sun. Shall we go?",
"start": 56110,
"end": 56830,
"end": 96490,
"confidence": 0.95934415,
"words": [
{
"text": "Over",
@@ -524,15 +457,7 @@
"start": 56470,
"end": 56830,
"speaker": "A"
}
]
},
{
"speaker": "A",
"text": "It's not coming from Mercury either.",
"start": 65950,
"end": 68350,
"words": [
},
{
"text": "It's",
"start": 65950,
@@ -568,29 +493,13 @@
"start": 67870,
"end": 68350,
"speaker": "A"
}
]
},
{
"speaker": "A",
"text": "Wow.",
"start": 77550,
"end": 78110,
"words": [
},
{
"text": "Wow.",
"start": 77550,
"end": 78110,
"speaker": "A"
}
]
},
{
"speaker": "A",
"text": "Then it's the sun that's making all that noise.",
"start": 78350,
"end": 81230,
"words": [
},
{
"text": "Then",
"start": 78350,
@@ -644,15 +553,7 @@
"start": 80790,
"end": 81230,
"speaker": "A"
}
]
},
{
"speaker": "A",
"text": "And that heat.",
"start": 81470,
"end": 82670,
"words": [
},
{
"text": "And",
"start": 81470,
@@ -670,15 +571,7 @@
"start": 82230,
"end": 82670,
"speaker": "A"
}
]
},
{
"speaker": "A",
"text": "Can you hear?",
"start": 88650,
"end": 89210,
"words": [
},
{
"text": "Can",
"start": 88650,
@@ -696,15 +589,7 @@
"start": 88930,
"end": 89210,
"speaker": "A"
}
]
},
{
"speaker": "A",
"text": "It sounds like it's coming from the inside of the sun.",
"start": 89370,
"end": 92010,
"words": [
},
{
"text": "It",
"start": 89370,
@@ -770,15 +655,7 @@
"start": 91730,
"end": 92010,
"speaker": "A"
}
]
},
{
"speaker": "A",
"text": "Shall we go?",
"start": 95690,
"end": 96490,
"words": [
},
{
"text": "Shall",
"start": 95690,
@@ -801,52 +678,29 @@
},
{
"speaker": "B",
"text": "Yeehaw.",
"text": "Yeehaw. Yay. Yoohoo. Yoo hoo. Yay.",
"start": 97050,
"end": 97850,
"end": 105850,
"confidence": 0.8153483,
"words": [
{
"text": "Yeehaw.",
"start": 97050,
"end": 97850,
"speaker": "B"
}
]
},
{
"speaker": "B",
"text": "Yay.",
"start": 100250,
"end": 100890,
"words": [
},
{
"text": "Yay.",
"start": 100250,
"end": 100890,
"speaker": "B"
}
]
},
{
"speaker": "B",
"text": "Yoohoo.",
"start": 101850,
"end": 102570,
"words": [
},
{
"text": "Yoohoo.",
"start": 101850,
"end": 102570,
"speaker": "B"
}
]
},
{
"speaker": "B",
"text": "Yoo hoo.",
"start": 103450,
"end": 104250,
"words": [
},
{
"text": "Yoo",
"start": 103450,
@@ -858,15 +712,7 @@
"start": 103810,
"end": 104250,
"speaker": "B"
}
]
},
{
"speaker": "B",
"text": "Yay.",
"start": 105210,
"end": 105850,
"words": [
},
{
"text": "Yay.",
"start": 105210,
@@ -877,9 +723,10 @@
},
{
"speaker": "A",
"text": "It's so hot in here.",
"text": "It's so hot in here. You want to come with us? How about that? Do you want to come with us?",
"start": 114890,
"end": 116570,
"end": 121350,
"confidence": 0.9607592,
"words": [
{
"text": "It's",
@@ -910,15 +757,7 @@
"start": 116210,
"end": 116570,
"speaker": "A"
}
]
},
{
"speaker": "A",
"text": "You want to come with us?",
"start": 117290,
"end": 118560,
"words": [
},
{
"text": "You",
"start": 117290,
@@ -954,15 +793,7 @@
"start": 118290,
"end": 118560,
"speaker": "A"
}
]
},
{
"speaker": "A",
"text": "How about that?",
"start": 119110,
"end": 119830,
"words": [
},
{
"text": "How",
"start": 119110,
@@ -980,15 +811,7 @@
"start": 119510,
"end": 119830,
"speaker": "A"
}
]
},
{
"speaker": "A",
"text": "Do you want to come with us?",
"start": 119830,
"end": 121350,
"words": [
},
{
"text": "Do",
"start": 119830,
@@ -1038,6 +861,7 @@
"text": "Come with us.",
"start": 121590,
"end": 122550,
"confidence": 0.9973958,
"words": [
{
"text": "Come",
@@ -1064,6 +888,7 @@
"text": "But where?",
"start": 123430,
"end": 124150,
"confidence": 0.9992676,
"words": [
{
"text": "But",
@@ -1084,6 +909,7 @@
"text": "Here.",
"start": 124550,
"end": 124950,
"confidence": 0.5415039,
"words": [
{
"text": "Here.",
@@ -1095,9 +921,10 @@
},
{
"speaker": "A",
"text": "To the Adam's party.",
"text": "To the Adam's party. A party inside the sun. Hey, Adams, Now I understand why it's so hot at your plate. You keep on dancing. Wow. It's piping hot. It's so hot that you can't turn into popcorn. These atoms are crazy. The more they mix, the more they produce heat. Whoa.",
"start": 125350,
"end": 127750,
"end": 197070,
"confidence": 0.9543778,
"words": [
{
"text": "To",
@@ -1122,15 +949,7 @@
"start": 127350,
"end": 127750,
"speaker": "A"
}
]
},
{
"speaker": "A",
"text": "A party inside the sun.",
"start": 128310,
"end": 130630,
"words": [
},
{
"text": "A",
"start": 128310,
@@ -1160,15 +979,7 @@
"start": 130310,
"end": 130630,
"speaker": "A"
}
]
},
{
"speaker": "A",
"text": "Hey, Adams, Now I understand why it's so hot at your plate.",
"start": 141190,
"end": 145750,
"words": [
},
{
"text": "Hey,",
"start": 141190,
@@ -1240,15 +1051,7 @@
"start": 145310,
"end": 145750,
"speaker": "A"
}
]
},
{
"speaker": "A",
"text": "You keep on dancing.",
"start": 145750,
"end": 147110,
"words": [
},
{
"text": "You",
"start": 145750,
@@ -1272,29 +1075,13 @@
"start": 146550,
"end": 147110,
"speaker": "A"
}
]
},
{
"speaker": "A",
"text": "Wow.",
"start": 149750,
"end": 150230,
"words": [
},
{
"text": "Wow.",
"start": 149750,
"end": 150230,
"speaker": "A"
}
]
},
{
"speaker": "A",
"text": "It's piping hot.",
"start": 150530,
"end": 151730,
"words": [
},
{
"text": "It's",
"start": 150530,
@@ -1312,15 +1099,7 @@
"start": 151330,
"end": 151730,
"speaker": "A"
}
]
},
{
"speaker": "A",
"text": "It's so hot that you can't turn into popcorn.",
"start": 160530,
"end": 163730,
"words": [
},
{
"text": "It's",
"start": 160530,
@@ -1374,15 +1153,7 @@
"start": 162930,
"end": 163730,
"speaker": "A"
}
]
},
{
"speaker": "A",
"text": "These atoms are crazy.",
"start": 188350,
"end": 190270,
"words": [
},
{
"text": "These",
"start": 188350,
@@ -1406,15 +1177,7 @@
"start": 189430,
"end": 190270,
"speaker": "A"
}
]
},
{
"speaker": "A",
"text": "The more they mix, the more they produce heat.",
"start": 190670,
"end": 193470,
"words": [
},
{
"text": "The",
"start": 190670,
@@ -1468,15 +1231,7 @@
"start": 192990,
"end": 193470,
"speaker": "A"
}
]
},
{
"speaker": "A",
"text": "Whoa.",
"start": 196510,
"end": 197070,
"words": [
},
{
"text": "Whoa.",
"start": 196510,
@@ -1487,24 +1242,17 @@
},
{
"speaker": "B",
"text": "Hey.",
"text": "Hey. Hee haw hee haw. Ha ha ha.",
"start": 197310,
"end": 197790,
"end": 203550,
"confidence": 0.81481934,
"words": [
{
"text": "Hey.",
"start": 197310,
"end": 197790,
"speaker": "B"
}
]
},
{
"speaker": "B",
"text": "Hee haw",
"start": 198510,
"end": 199390,
"words": [
},
{
"text": "Hee",
"start": 198510,
@@ -1516,15 +1264,7 @@
"start": 198950,
"end": 199390,
"speaker": "B"
}
]
},
{
"speaker": "B",
"text": "hee haw.",
"start": 201310,
"end": 202110,
"words": [
},
{
"text": "hee",
"start": 201310,
@@ -1536,15 +1276,7 @@
"start": 201630,
"end": 202110,
"speaker": "B"
}
]
},
{
"speaker": "B",
"text": "Ha ha ha.",
"start": 202670,
"end": 203550,
"words": [
},
{
"text": "Ha",
"start": 202670,
@@ -1567,9 +1299,10 @@
},
{
"speaker": "A",
"text": "Ah, sun.",
"text": "Ah, sun. Heat is so good. I love it.",
"start": 209880,
"end": 210640,
"end": 213160,
"confidence": 0.9660102,
"words": [
{
"text": "Ah,",
@@ -1582,15 +1315,7 @@
"start": 210280,
"end": 210640,
"speaker": "A"
}
]
},
{
"speaker": "A",
"text": "Heat is so good.",
"start": 210640,
"end": 211880,
"words": [
},
{
"text": "Heat",
"start": 210640,
@@ -1614,15 +1339,7 @@
"start": 211560,
"end": 211880,
"speaker": "A"
}
]
},
{
"speaker": "A",
"text": "I love it.",
"start": 212120,
"end": 213160,
"words": [
},
{
"text": "I",
"start": 212120,
@@ -1648,6 +1365,7 @@
"text": "Ha ha.",
"start": 220040,
"end": 220680,
"confidence": 0.8276367,
"words": [
{
"text": "Ha",
@@ -1668,6 +1386,7 @@
"text": "The popcorn is pretty hot.",
"start": 220760,
"end": 222520,
"confidence": 0.90270996,
"words": [
{
"text": "The",
@@ -1706,6 +1425,7 @@
"text": "Thank you, sun.",
"start": 224280,
"end": 225240,
"confidence": 0.9963379,
"words": [
{
"text": "Thank",
@@ -1729,9 +1449,10 @@
},
{
"speaker": "A",
"text": "The inside of our star.",
"text": "The inside of our star. The sun is made of atoms which bump into each other, merge, and produce heat.",
"start": 227080,
"end": 228520,
"end": 235240,
"confidence": 0.9702759,
"words": [
{
"text": "The",
@@ -1762,15 +1483,7 @@
"start": 228200,
"end": 228520,
"speaker": "A"
}
]
},
{
"speaker": "A",
"text": "The sun is made of atoms which bump into each other, merge, and produce heat.",
"start": 228840,
"end": 235240,
"words": [
},
{
"text": "The",
"start": 228840,
@@ -1868,6 +1581,7 @@
"text": "Sam.",
"start": 236160,
"end": 240750,
"confidence": 0.48602295,
"words": [
{
"text": "Sam.",

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff