代码之家  ›  专栏  ›  技术社区  ›  DennisVA

自定义TextToSpeechService中的突出显示错误

  •  0
  • DennisVA  · 技术社区  · 5 年前

    audioPositionMillis 可能是错的,但据我所知,计算是正确的。我觉得音频定位毫秒太快了700毫秒。我忽略了一些小东西

       @Override
        protected synchronized void onSynthesizeText(SynthesisRequest request, SynthesisCallback callback) {
    
            // Note that we call onLoadLanguage here since there is no guarantee
            // that there was a prior call to this function.
            int load = onLoadLanguage(request.getLanguage(), request.getCountry(), request.getVariant());
    
            // We might get requests for a language we don't support - in which case
            // we error out early before wasting too much time.
            if (load == TextToSpeech.LANG_NOT_SUPPORTED) {
                callback.error();
                return;
            }
    
            String ttsText = request.getCharSequenceText().toString();
            final int speechRate = mapSpeechRate(request.getSpeechRate());
            TtsParams ttsParams = new TtsParams(ttsText, currentVoice, speechRate, VOLUME,
                    TIME_BETWEEN_SENTENCES_MILLIS, BIT_RATE, TtsParams.Format.WAV);
    
            try {
                TtsInfo data = null;
                Response<TtsInfo> response = serviceManager.getTtsInfo(ttsParams); //Synchronous call because methods executed on the synthesisCallback need to be called on the synth thread.
                if(response != null){
                    data = response.body();
                }
    
                if(data == null){
                    callback.error();
                    return;
                }
    
                //Response does not make any sense to me, we modify its data
                List<Integer> wordPositionsMs = data.getAudioPos();
                List<Integer> wordStartPositions = data.getCharPos();
                List<Integer> wordLengths = data.getCharCount();
    
                wordStartPositions.add(0, 0);
                wordStartPositions.remove(wordStartPositions.size() - 1);
    
                wordPositionsMs.add(0, 102); //First word always starts at 102ms according to the docs
                wordPositionsMs.remove(wordStartPositions.size() - 1);
    
                callback.start(SAMPLING_RATE_HZ, AudioFormat.ENCODING_PCM_16BIT, CHANNEL_COUNT);
                int maxBufferSize = callback.getMaxBufferSize();
                byte[] audioBuffer = Base64.decode(data.getByteArray(), Base64.DEFAULT);
                int offset = 0;
                while (offset < audioBuffer.length) {
                    int bytesToWrite = Math.min(maxBufferSize, audioBuffer.length - offset);
                    if(callback.audioAvailable(audioBuffer, offset, bytesToWrite) != TextToSpeech.SUCCESS){
                        callback.error();
                        return;
                    }
    
                    if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
                        long audioPositionMillis = Math.round(offset / ((SAMPLING_RATE_HZ/1000D) * CHANNEL_COUNT * (BIT_DEPTH/8D)));
                        int wordIndex = -1;
                        for (int i = 0; i < wordPositionsMs.size(); i++) {
                            if (audioPositionMillis > wordPositionsMs.get(i)) {
                                wordIndex++;
                            } else {
                                break;
                            }
                        }
    
                        if (wordIndex > -1) {
                            int wordStart = wordStartPositions.get(wordIndex);
                            int wordLength = wordLengths.get(wordIndex);
                            callback.rangeStart(-1, wordStart, wordStart + wordLength);
                        }
                    }
    
                    offset += bytesToWrite;
                }
                callback.done();
            } catch (IOException | NoNetworkException e) {
                e.printStackTrace();
                callback.error();
            }
        }
    
    0 回复  |  直到 5 年前
        1
  •  0
  •   DennisVA    5 年前

    我将-1作为markerInFrames参数传递给rangeStart回调方法,这导致了这种情况。

    callback.rangeStart((int)(offset/(BIT_DEPTH/8D)), wordStart, wordStart + wordLength);