我找到了一个解决方案,
问题是,当我们给from_ndarray函数一个numpy数组时,它的格式应该是这样的:[
[ 1] [ 1] [0] .... [ 1] [ 0]
]
所以现在我用pyaudio来听麦克风,但numpy数组的形状不正确,所以我用data.reform(-1,1)来完美地重塑它
这是最后一个自定义类:
class CustomAudioTrack(MediaStreamTrack):
kind = "audio"
def __init__(self, rate=48000, channels=2):
super().__init__()
self.rate = rate
self.channels = channels
self._timestamp = 0
self.pa = pyaudio.PyAudio()
self.stream = self.pa.open(format=pyaudio.paInt16,
channels=2,
rate=48000,
input=True,
frames_per_buffer=960)
async def recv(self):
frames_per_buffer = 960
data = np.frombuffer(self.stream.read(
frames_per_buffer), dtype=np.int16)
data = data.reshape(-1, 1)
self._timestamp += frames_per_buffer
pts = self._timestamp
time_base = Fraction(1, self.rate)
audio_frame = av.AudioFrame.from_ndarray(
data.T, format='s16', layout='stereo')
audio_frame.sample_rate = self.rate
audio_frame.pts = pts
audio_frame.time_base = time_base
return audio_frame
def __del__(self):
self.stream.stop_stream()
self.stream.close()
self.pa.terminate()