// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
import{i18n}from"../i18n.js";import{assertExists}from"../utils/assert.js";import{z}from"../utils/schema.js";import{getWordCount,lazyInit,sliceWhen}from"../utils/utils.js";import{LanguageCode}from"./language_info.js";export const timeRangeSchema=z.object({startMs:z.number(),endMs:z.number()});export const textPartSchema=z.object({kind:z.literal("textPart"),text:z.string(),timeRange:z.nullable(timeRangeSchema),leadingSpace:z.nullable(z.boolean()),speakerLabel:z.autoNullOptional(z.string()),partial:z.optional(z.literal(true))});export const textSeparatorSchema=z.object({kind:z.literal("textSeparator")});export const textSeparator={kind:"textSeparator"};export const textTokenSchema=z.union([textPartSchema,textSeparatorSchema]);function toMs(timeDelta){if(timeDelta===null){return null}return Number(timeDelta.microseconds)/1e3}function parseTimingInfo(timingInfo,offsetMs){if(timingInfo===null){return null}const{audioStartTime:audioStartTime,eventEndTime:eventEndTime}=timingInfo;return{startMs:toMs(audioStartTime)+offsetMs,endMs:toMs(eventEndTime)+offsetMs}}function flattenEvent(ev,offsetMs,speakerLabelEnabled,isPartialResult=false){const{hypothesisPart:hypothesisPart,timingEvent:timingEvent}=ev;const result=[];const eventTimeRange=parseTimingInfo(timingEvent,offsetMs);if(eventTimeRange===null){console.error("soda event has no timestamp",ev)}if(hypothesisPart===null||hypothesisPart.length===0){return[]}for(const[i,part]of hypothesisPart.entries()){const timeRange=(()=>{if(eventTimeRange===null||part.alignment===null){return null}const startMs=toMs(part.alignment);const endMs=i!==hypothesisPart.length-1?toMs(assertExists(hypothesisPart[i+1]).alignment):eventTimeRange.endMs-eventTimeRange.startMs;if(endMs===null){return null}return{startMs:startMs+eventTimeRange.startMs,endMs:endMs+eventTimeRange.startMs}})();result.push({kind:"textPart",text:assertExists(part.text[0]),timeRange:timeRange,leadingSpace:part.leadingSpace,speakerLabel:speakerLabelEnabled?part.speakerLabel:null,partial:isPartialResult?true:undefined})}return result}export class SodaEventTransformer{constructor(speakerLabelEnabled){this.speakerLabelEnabled=speakerLabelEnabled;this.tokens=[];this.partialResultTokens=null}finalizeTokens(){if(this.partialResultTokens!==null){if(this.tokens.length>0){this.tokens.push(textSeparator)}for(const token of this.partialResultTokens){if(token.kind!=="textPart"){this.tokens.push(token);continue}const{partial:partial,...finalToken}=token;this.tokens.push(finalToken)}this.partialResultTokens=null}}getTranscription(language){const tokens=[...this.tokens];if(this.partialResultTokens!==null){if(tokens.length>0){tokens.push(textSeparator)}tokens.push(...this.partialResultTokens)}return new Transcription(tokens,language)}handleSpeakerLabelCorrectionEvent(ev,offsetMs){const{hypothesisParts:hypothesisParts}=ev;for(const correctionPart of hypothesisParts){const speakerLabel=correctionPart.speakerLabel??null;const startMs=toMs(correctionPart.alignment);if(startMs===null){console.error("speaker label correction event without timestamp",ev);continue}let found=false;for(let i=this.tokens.length-1;i>=0;i--){const token=assertExists(this.tokens[i]);if(token.kind==="textSeparator"){continue}if(token.timeRange?.startMs===startMs+offsetMs&&token.text===correctionPart.text[0]){if(token.speakerLabel!==null){token.speakerLabel=speakerLabel}found=true;break}}if(!found){console.error("speaker label correction event without corresponding previous part?",ev)}}}addEvent(event,offsetMs){if("partialResult"in event){this.partialResultTokens=flattenEvent(event.partialResult,offsetMs,this.speakerLabelEnabled,true);return}if("finalResult"in event){this.partialResultTokens=null;const{finalResult:finalResult}=event;if(this.tokens.length>0){this.tokens.push(textSeparator)}this.tokens.push(...flattenEvent(finalResult,offsetMs,this.speakerLabelEnabled));return}if("labelCorrectionEvent"in event){this.handleSpeakerLabelCorrectionEvent(event.labelCorrectionEvent,offsetMs);return}console.error("unknown event type",event)}}export const transcriptionSchema=z.transform(z.object({textTokens:z.nullable(z.array(textTokenSchema)),language:z.withDefault(z.nativeEnum(LanguageCode),LanguageCode.EN_US)}),{test(input){return input instanceof Transcription},decode({textTokens:textTokens,language:language}){if(textTokens===null){return null}return new Transcription(textTokens,language)},encode(val){if(val===null){return{textTokens:null,language:LanguageCode.EN_US}}return{textTokens:val.textTokens,language:val.language}}});const MAX_DESCRIPTION_LENGTH=512;export class Transcription{constructor(textTokens,language){this.textTokens=textTokens;this.language=language;this.getWordCount=lazyInit((()=>getWordCount(this.toPlainText(),this.language)));this.toPlainText=lazyInit((()=>{const ret=[];let startOfParagraph=true;for(const token of this.textTokens){if(token.kind==="textSeparator"){ret.push("\n");startOfParagraph=true;continue}if(!startOfParagraph&&(token.leadingSpace??true)){ret.push(" ")}ret.push(token.text);startOfParagraph=false}return ret.join("")}));this.toExportText=lazyInit((()=>{const ret=[];let startOfParagraph=true;let currentSpeaker=null;for(const token of this.textTokens){if(token.kind==="textSeparator"){ret.push("\n");startOfParagraph=true;continue}if(token.speakerLabel!==currentSpeaker){if(!startOfParagraph){ret.push("\n");startOfParagraph=true}if(ret.length!==0){ret.push("\n")}if(token.speakerLabel!==null){ret.push(i18n.transcriptionSpeakerLabelLabel(token.speakerLabel));ret.push("\n")}currentSpeaker=token.speakerLabel}if(!startOfParagraph&&(token.leadingSpace??true)){ret.push(" ")}ret.push(token.text);startOfParagraph=false}return ret.join("")}));this.toShortDescription=lazyInit((()=>{if(this.textTokens===null){return""}const transcription=this.toPlainText();if(transcription.length<=MAX_DESCRIPTION_LENGTH-3){return transcription}return transcription.substring(0,MAX_DESCRIPTION_LENGTH-3)+"..."}));this.getSpeakerLabels=lazyInit((()=>{const speakerLabels=new Set;for(const token of this.textTokens){if(token.kind==="textPart"&&token.speakerLabel!==null&&!token.partial){speakerLabels.add(token.speakerLabel)}}return Array.from(speakerLabels)}));this.getParagraphs=lazyInit((()=>{const slicedTokens=sliceWhen(this.textTokens,((a,b)=>{if(a.kind==="textSeparator"||b.kind==="textSeparator"){return true}if(a.timeRange===null&&b.timeRange===null){return false}if(a.timeRange?.endMs!==b.timeRange?.startMs){return true}if(a.partial!==b.partial){return true}if(!a.partial&&a.speakerLabel!==b.speakerLabel){return true}return false}));return slicedTokens.filter((tokens=>tokens.every((t=>t.kind==="textPart"))))}))}isEmpty(){return this.textTokens.length===0}}