diff --git a/web/src/lib/wavtools/index.js b/web/src/lib/wavtools/index.js
new file mode 100644
index 00000000..71238942
--- /dev/null
+++ b/web/src/lib/wavtools/index.js
@@ -0,0 +1,6 @@
+import { WavPacker } from './lib/wav_packer.js';
+import { AudioAnalysis } from './lib/analysis/audio_analysis.js';
+import { WavStreamPlayer } from './lib/wav_stream_player.js';
+import { WavRecorder } from './lib/wav_recorder.js';
+
+export { AudioAnalysis, WavPacker, WavStreamPlayer, WavRecorder };
diff --git a/web/src/lib/wavtools/lib/analysis/audio_analysis.js b/web/src/lib/wavtools/lib/analysis/audio_analysis.js
new file mode 100644
index 00000000..4af34d54
--- /dev/null
+++ b/web/src/lib/wavtools/lib/analysis/audio_analysis.js
@@ -0,0 +1,203 @@
+import {
+ noteFrequencies,
+ noteFrequencyLabels,
+ voiceFrequencies,
+ voiceFrequencyLabels,
+} from './constants.js';
+
+/**
+ * Output of AudioAnalysis for the frequency domain of the audio
+ * @typedef {Object} AudioAnalysisOutputType
+ * @property {Float32Array} values Amplitude of this frequency between {0, 1} inclusive
+ * @property {number[]} frequencies Raw frequency bucket values
+ * @property {string[]} labels Labels for the frequency bucket values
+ */
+
+/**
+ * Analyzes audio for visual output
+ * @class
+ */
+export class AudioAnalysis {
+ /**
+ * Retrieves frequency domain data from an AnalyserNode adjusted to a decibel range
+ * returns human-readable formatting and labels
+ * @param {AnalyserNode} analyser
+ * @param {number} sampleRate
+ * @param {Float32Array} [fftResult]
+ * @param {"frequency"|"music"|"voice"} [analysisType]
+ * @param {number} [minDecibels] default -100
+ * @param {number} [maxDecibels] default -30
+ * @returns {AudioAnalysisOutputType}
+ */
+ static getFrequencies(
+ analyser,
+ sampleRate,
+ fftResult,
+ analysisType = 'frequency',
+ minDecibels = -100,
+ maxDecibels = -30,
+ ) {
+ if (!fftResult) {
+ fftResult = new Float32Array(analyser.frequencyBinCount);
+ analyser.getFloatFrequencyData(fftResult);
+ }
+ const nyquistFrequency = sampleRate / 2;
+ const frequencyStep = (1 / fftResult.length) * nyquistFrequency;
+ let outputValues;
+ let frequencies;
+ let labels;
+ if (analysisType === 'music' || analysisType === 'voice') {
+ const useFrequencies =
+ analysisType === 'voice' ? voiceFrequencies : noteFrequencies;
+ const aggregateOutput = Array(useFrequencies.length).fill(minDecibels);
+ for (let i = 0; i < fftResult.length; i++) {
+ const frequency = i * frequencyStep;
+ const amplitude = fftResult[i];
+ for (let n = useFrequencies.length - 1; n >= 0; n--) {
+ if (frequency > useFrequencies[n]) {
+ aggregateOutput[n] = Math.max(aggregateOutput[n], amplitude);
+ break;
+ }
+ }
+ }
+ outputValues = aggregateOutput;
+ frequencies =
+ analysisType === 'voice' ? voiceFrequencies : noteFrequencies;
+ labels =
+ analysisType === 'voice' ? voiceFrequencyLabels : noteFrequencyLabels;
+ } else {
+ outputValues = Array.from(fftResult);
+ frequencies = outputValues.map((_, i) => frequencyStep * i);
+ labels = frequencies.map((f) => `${f.toFixed(2)} Hz`);
+ }
+ // We normalize to {0, 1}
+ const normalizedOutput = outputValues.map((v) => {
+ return Math.max(
+ 0,
+ Math.min((v - minDecibels) / (maxDecibels - minDecibels), 1),
+ );
+ });
+ const values = new Float32Array(normalizedOutput);
+ return {
+ values,
+ frequencies,
+ labels,
+ };
+ }
+
+ /**
+ * Creates a new AudioAnalysis instance for an HTMLAudioElement
+ * @param {HTMLAudioElement} audioElement
+ * @param {AudioBuffer|null} [audioBuffer] If provided, will cache all frequency domain data from the buffer
+ * @returns {AudioAnalysis}
+ */
+ constructor(audioElement, audioBuffer = null) {
+ this.fftResults = [];
+ if (audioBuffer) {
+ /**
+ * Modified from
+ * https://stackoverflow.com/questions/75063715/using-the-web-audio-api-to-analyze-a-song-without-playing
+ *
+ * We do this to populate FFT values for the audio if provided an `audioBuffer`
+ * The reason to do this is that Safari fails when using `createMediaElementSource`
+ * This has a non-zero RAM cost so we only opt-in to run it on Safari, Chrome is better
+ */
+ const { length, sampleRate } = audioBuffer;
+ const offlineAudioContext = new OfflineAudioContext({
+ length,
+ sampleRate,
+ });
+ const source = offlineAudioContext.createBufferSource();
+ source.buffer = audioBuffer;
+ const analyser = offlineAudioContext.createAnalyser();
+ analyser.fftSize = 8192;
+ analyser.smoothingTimeConstant = 0.1;
+ source.connect(analyser);
+ // limit is :: 128 / sampleRate;
+ // but we just want 60fps - cuts ~1s from 6MB to 1MB of RAM
+ const renderQuantumInSeconds = 1 / 60;
+ const durationInSeconds = length / sampleRate;
+ const analyze = (index) => {
+ const suspendTime = renderQuantumInSeconds * index;
+ if (suspendTime < durationInSeconds) {
+ offlineAudioContext.suspend(suspendTime).then(() => {
+ const fftResult = new Float32Array(analyser.frequencyBinCount);
+ analyser.getFloatFrequencyData(fftResult);
+ this.fftResults.push(fftResult);
+ analyze(index + 1);
+ });
+ }
+ if (index === 1) {
+ offlineAudioContext.startRendering();
+ } else {
+ offlineAudioContext.resume();
+ }
+ };
+ source.start(0);
+ analyze(1);
+ this.audio = audioElement;
+ this.context = offlineAudioContext;
+ this.analyser = analyser;
+ this.sampleRate = sampleRate;
+ this.audioBuffer = audioBuffer;
+ } else {
+ const audioContext = new AudioContext();
+ const track = audioContext.createMediaElementSource(audioElement);
+ const analyser = audioContext.createAnalyser();
+ analyser.fftSize = 8192;
+ analyser.smoothingTimeConstant = 0.1;
+ track.connect(analyser);
+ analyser.connect(audioContext.destination);
+ this.audio = audioElement;
+ this.context = audioContext;
+ this.analyser = analyser;
+ this.sampleRate = this.context.sampleRate;
+ this.audioBuffer = null;
+ }
+ }
+
+ /**
+ * Gets the current frequency domain data from the playing audio track
+ * @param {"frequency"|"music"|"voice"} [analysisType]
+ * @param {number} [minDecibels] default -100
+ * @param {number} [maxDecibels] default -30
+ * @returns {AudioAnalysisOutputType}
+ */
+ getFrequencies(
+ analysisType = 'frequency',
+ minDecibels = -100,
+ maxDecibels = -30,
+ ) {
+ let fftResult = null;
+ if (this.audioBuffer && this.fftResults.length) {
+ const pct = this.audio.currentTime / this.audio.duration;
+ const index = Math.min(
+ (pct * this.fftResults.length) | 0,
+ this.fftResults.length - 1,
+ );
+ fftResult = this.fftResults[index];
+ }
+ return AudioAnalysis.getFrequencies(
+ this.analyser,
+ this.sampleRate,
+ fftResult,
+ analysisType,
+ minDecibels,
+ maxDecibels,
+ );
+ }
+
+ /**
+ * Resume the internal AudioContext if it was suspended due to the lack of
+ * user interaction when the AudioAnalysis was instantiated.
+ * @returns {Promise
}
+ */
+ async resumeIfSuspended() {
+ if (this.context.state === 'suspended') {
+ await this.context.resume();
+ }
+ return true;
+ }
+}
+
+globalThis.AudioAnalysis = AudioAnalysis;
diff --git a/web/src/lib/wavtools/lib/analysis/constants.js b/web/src/lib/wavtools/lib/analysis/constants.js
new file mode 100644
index 00000000..f14da38e
--- /dev/null
+++ b/web/src/lib/wavtools/lib/analysis/constants.js
@@ -0,0 +1,60 @@
+/**
+ * Constants for help with visualization
+ * Helps map frequency ranges from Fast Fourier Transform
+ * to human-interpretable ranges, notably music ranges and
+ * human vocal ranges.
+ */
+
+// Eighth octave frequencies
+const octave8Frequencies = [
+ 4186.01, 4434.92, 4698.63, 4978.03, 5274.04, 5587.65, 5919.91, 6271.93,
+ 6644.88, 7040.0, 7458.62, 7902.13,
+];
+
+// Labels for each of the above frequencies
+const octave8FrequencyLabels = [
+ 'C',
+ 'C#',
+ 'D',
+ 'D#',
+ 'E',
+ 'F',
+ 'F#',
+ 'G',
+ 'G#',
+ 'A',
+ 'A#',
+ 'B',
+];
+
+/**
+ * All note frequencies from 1st to 8th octave
+ * in format "A#8" (A#, 8th octave)
+ */
+export const noteFrequencies = [];
+export const noteFrequencyLabels = [];
+for (let i = 1; i <= 8; i++) {
+ for (let f = 0; f < octave8Frequencies.length; f++) {
+ const freq = octave8Frequencies[f];
+ noteFrequencies.push(freq / Math.pow(2, 8 - i));
+ noteFrequencyLabels.push(octave8FrequencyLabels[f] + i);
+ }
+}
+
+/**
+ * Subset of the note frequencies between 32 and 2000 Hz
+ * 6 octave range: C1 to B6
+ */
+const voiceFrequencyRange = [32.0, 2000.0];
+export const voiceFrequencies = noteFrequencies.filter((_, i) => {
+ return (
+ noteFrequencies[i] > voiceFrequencyRange[0] &&
+ noteFrequencies[i] < voiceFrequencyRange[1]
+ );
+});
+export const voiceFrequencyLabels = noteFrequencyLabels.filter((_, i) => {
+ return (
+ noteFrequencies[i] > voiceFrequencyRange[0] &&
+ noteFrequencies[i] < voiceFrequencyRange[1]
+ );
+});
diff --git a/web/src/lib/wavtools/lib/wav_packer.js b/web/src/lib/wavtools/lib/wav_packer.js
new file mode 100644
index 00000000..7146b7fd
--- /dev/null
+++ b/web/src/lib/wavtools/lib/wav_packer.js
@@ -0,0 +1,113 @@
+/**
+ * Raw wav audio file contents
+ * @typedef {Object} WavPackerAudioType
+ * @property {Blob} blob
+ * @property {string} url
+ * @property {number} channelCount
+ * @property {number} sampleRate
+ * @property {number} duration
+ */
+
+/**
+ * Utility class for assembling PCM16 "audio/wav" data
+ * @class
+ */
+export class WavPacker {
+ /**
+ * Converts Float32Array of amplitude data to ArrayBuffer in Int16Array format
+ * @param {Float32Array} float32Array
+ * @returns {ArrayBuffer}
+ */
+ static floatTo16BitPCM(float32Array) {
+ const buffer = new ArrayBuffer(float32Array.length * 2);
+ const view = new DataView(buffer);
+ let offset = 0;
+ for (let i = 0; i < float32Array.length; i++, offset += 2) {
+ let s = Math.max(-1, Math.min(1, float32Array[i]));
+ view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true);
+ }
+ return buffer;
+ }
+
+ /**
+ * Concatenates two ArrayBuffers
+ * @param {ArrayBuffer} leftBuffer
+ * @param {ArrayBuffer} rightBuffer
+ * @returns {ArrayBuffer}
+ */
+ static mergeBuffers(leftBuffer, rightBuffer) {
+ const tmpArray = new Uint8Array(
+ leftBuffer.byteLength + rightBuffer.byteLength
+ );
+ tmpArray.set(new Uint8Array(leftBuffer), 0);
+ tmpArray.set(new Uint8Array(rightBuffer), leftBuffer.byteLength);
+ return tmpArray.buffer;
+ }
+
+ /**
+ * Packs data into an Int16 format
+ * @private
+ * @param {number} size 0 = 1x Int16, 1 = 2x Int16
+ * @param {number} arg value to pack
+ * @returns
+ */
+ _packData(size, arg) {
+ return [
+ new Uint8Array([arg, arg >> 8]),
+ new Uint8Array([arg, arg >> 8, arg >> 16, arg >> 24]),
+ ][size];
+ }
+
+ /**
+ * Packs audio into "audio/wav" Blob
+ * @param {number} sampleRate
+ * @param {{bitsPerSample: number, channels: Array, data: Int16Array}} audio
+ * @returns {WavPackerAudioType}
+ */
+ pack(sampleRate, audio) {
+ if (!audio?.bitsPerSample) {
+ throw new Error(`Missing "bitsPerSample"`);
+ } else if (!audio?.channels) {
+ throw new Error(`Missing "channels"`);
+ } else if (!audio?.data) {
+ throw new Error(`Missing "data"`);
+ }
+ const { bitsPerSample, channels, data } = audio;
+ const output = [
+ // Header
+ 'RIFF',
+ this._packData(
+ 1,
+ 4 + (8 + 24) /* chunk 1 length */ + (8 + 8) /* chunk 2 length */
+ ), // Length
+ 'WAVE',
+ // chunk 1
+ 'fmt ', // Sub-chunk identifier
+ this._packData(1, 16), // Chunk length
+ this._packData(0, 1), // Audio format (1 is linear quantization)
+ this._packData(0, channels.length),
+ this._packData(1, sampleRate),
+ this._packData(1, (sampleRate * channels.length * bitsPerSample) / 8), // Byte rate
+ this._packData(0, (channels.length * bitsPerSample) / 8),
+ this._packData(0, bitsPerSample),
+ // chunk 2
+ 'data', // Sub-chunk identifier
+ this._packData(
+ 1,
+ (channels[0].length * channels.length * bitsPerSample) / 8
+ ), // Chunk length
+ data,
+ ];
+ const blob = new Blob(output, { type: 'audio/mpeg' });
+ const url = URL.createObjectURL(blob);
+ return {
+ blob,
+ url,
+ channelCount: channels.length,
+ sampleRate,
+ duration: data.byteLength / (channels.length * sampleRate * 2),
+ };
+ }
+}
+
+globalThis.WavPacker = WavPacker;
diff --git a/web/src/lib/wavtools/lib/wav_recorder.js b/web/src/lib/wavtools/lib/wav_recorder.js
new file mode 100644
index 00000000..a4f1d045
--- /dev/null
+++ b/web/src/lib/wavtools/lib/wav_recorder.js
@@ -0,0 +1,548 @@
+import { AudioProcessorSrc } from './worklets/audio_processor.js';
+import { AudioAnalysis } from './analysis/audio_analysis.js';
+import { WavPacker } from './wav_packer.js';
+
+/**
+ * Decodes audio into a wav file
+ * @typedef {Object} DecodedAudioType
+ * @property {Blob} blob
+ * @property {string} url
+ * @property {Float32Array} values
+ * @property {AudioBuffer} audioBuffer
+ */
+
+/**
+ * Records live stream of user audio as PCM16 "audio/wav" data
+ * @class
+ */
+export class WavRecorder {
+ /**
+ * Create a new WavRecorder instance
+ * @param {{sampleRate?: number, outputToSpeakers?: boolean, debug?: boolean}} [options]
+ * @returns {WavRecorder}
+ */
+ constructor({
+ sampleRate = 44100,
+ outputToSpeakers = false,
+ debug = false,
+ } = {}) {
+ // Script source
+ this.scriptSrc = AudioProcessorSrc;
+ // Config
+ this.sampleRate = sampleRate;
+ this.outputToSpeakers = outputToSpeakers;
+ this.debug = !!debug;
+ this._deviceChangeCallback = null;
+ this._devices = [];
+ // State variables
+ this.stream = null;
+ this.processor = null;
+ this.source = null;
+ this.node = null;
+ this.recording = false;
+ // Event handling with AudioWorklet
+ this._lastEventId = 0;
+ this.eventReceipts = {};
+ this.eventTimeout = 5000;
+ // Process chunks of audio
+ this._chunkProcessor = () => {};
+ this._chunkProcessorSize = void 0;
+ this._chunkProcessorBuffer = {
+ raw: new ArrayBuffer(0),
+ mono: new ArrayBuffer(0),
+ };
+ }
+
+ /**
+ * Decodes audio data from multiple formats to a Blob, url, Float32Array and AudioBuffer
+ * @param {Blob|Float32Array|Int16Array|ArrayBuffer|number[]} audioData
+ * @param {number} sampleRate
+ * @param {number} fromSampleRate
+ * @returns {Promise}
+ */
+ static async decode(audioData, sampleRate = 44100, fromSampleRate = -1) {
+ const context = new AudioContext({ sampleRate });
+ let arrayBuffer;
+ let blob;
+ if (audioData instanceof Blob) {
+ if (fromSampleRate !== -1) {
+ throw new Error(
+ `Can not specify "fromSampleRate" when reading from Blob`,
+ );
+ }
+ blob = audioData;
+ arrayBuffer = await blob.arrayBuffer();
+ } else if (audioData instanceof ArrayBuffer) {
+ if (fromSampleRate !== -1) {
+ throw new Error(
+ `Can not specify "fromSampleRate" when reading from ArrayBuffer`,
+ );
+ }
+ arrayBuffer = audioData;
+ blob = new Blob([arrayBuffer], { type: 'audio/wav' });
+ } else {
+ let float32Array;
+ let data;
+ if (audioData instanceof Int16Array) {
+ data = audioData;
+ float32Array = new Float32Array(audioData.length);
+ for (let i = 0; i < audioData.length; i++) {
+ float32Array[i] = audioData[i] / 0x8000;
+ }
+ } else if (audioData instanceof Float32Array) {
+ float32Array = audioData;
+ } else if (audioData instanceof Array) {
+ float32Array = new Float32Array(audioData);
+ } else {
+ throw new Error(
+ `"audioData" must be one of: Blob, Float32Arrray, Int16Array, ArrayBuffer, Array`,
+ );
+ }
+ if (fromSampleRate === -1) {
+ throw new Error(
+ `Must specify "fromSampleRate" when reading from Float32Array, In16Array or Array`,
+ );
+ } else if (fromSampleRate < 3000) {
+ throw new Error(`Minimum "fromSampleRate" is 3000 (3kHz)`);
+ }
+ if (!data) {
+ data = WavPacker.floatTo16BitPCM(float32Array);
+ }
+ const audio = {
+ bitsPerSample: 16,
+ channels: [float32Array],
+ data,
+ };
+ const packer = new WavPacker();
+ const result = packer.pack(fromSampleRate, audio);
+ blob = result.blob;
+ arrayBuffer = await blob.arrayBuffer();
+ }
+ const audioBuffer = await context.decodeAudioData(arrayBuffer);
+ const values = audioBuffer.getChannelData(0);
+ const url = URL.createObjectURL(blob);
+ return {
+ blob,
+ url,
+ values,
+ audioBuffer,
+ };
+ }
+
+ /**
+ * Logs data in debug mode
+ * @param {...any} arguments
+ * @returns {true}
+ */
+ log() {
+ if (this.debug) {
+ this.log(...arguments);
+ }
+ return true;
+ }
+
+ /**
+ * Retrieves the current sampleRate for the recorder
+ * @returns {number}
+ */
+ getSampleRate() {
+ return this.sampleRate;
+ }
+
+ /**
+ * Retrieves the current status of the recording
+ * @returns {"ended"|"paused"|"recording"}
+ */
+ getStatus() {
+ if (!this.processor) {
+ return 'ended';
+ } else if (!this.recording) {
+ return 'paused';
+ } else {
+ return 'recording';
+ }
+ }
+
+ /**
+ * Sends an event to the AudioWorklet
+ * @private
+ * @param {string} name
+ * @param {{[key: string]: any}} data
+ * @param {AudioWorkletNode} [_processor]
+ * @returns {Promise<{[key: string]: any}>}
+ */
+ async _event(name, data = {}, _processor = null) {
+ _processor = _processor || this.processor;
+ if (!_processor) {
+ throw new Error('Can not send events without recording first');
+ }
+ const message = {
+ event: name,
+ id: this._lastEventId++,
+ data,
+ };
+ _processor.port.postMessage(message);
+ const t0 = new Date().valueOf();
+ while (!this.eventReceipts[message.id]) {
+ if (new Date().valueOf() - t0 > this.eventTimeout) {
+ throw new Error(`Timeout waiting for "${name}" event`);
+ }
+ await new Promise((res) => setTimeout(() => res(true), 1));
+ }
+ const payload = this.eventReceipts[message.id];
+ delete this.eventReceipts[message.id];
+ return payload;
+ }
+
+ /**
+ * Sets device change callback, remove if callback provided is `null`
+ * @param {(Array): void|null} callback
+ * @returns {true}
+ */
+ listenForDeviceChange(callback) {
+ if (callback === null && this._deviceChangeCallback) {
+ navigator.mediaDevices.removeEventListener(
+ 'devicechange',
+ this._deviceChangeCallback,
+ );
+ this._deviceChangeCallback = null;
+ } else if (callback !== null) {
+ // Basically a debounce; we only want this called once when devices change
+ // And we only want the most recent callback() to be executed
+ // if a few are operating at the same time
+ let lastId = 0;
+ let lastDevices = [];
+ const serializeDevices = (devices) =>
+ devices
+ .map((d) => d.deviceId)
+ .sort()
+ .join(',');
+ const cb = async () => {
+ let id = ++lastId;
+ const devices = await this.listDevices();
+ if (id === lastId) {
+ if (serializeDevices(lastDevices) !== serializeDevices(devices)) {
+ lastDevices = devices;
+ callback(devices.slice());
+ }
+ }
+ };
+ navigator.mediaDevices.addEventListener('devicechange', cb);
+ cb();
+ this._deviceChangeCallback = cb;
+ }
+ return true;
+ }
+
+ /**
+ * Manually request permission to use the microphone
+ * @returns {Promise}
+ */
+ async requestPermission() {
+ const permissionStatus = await navigator.permissions.query({
+ name: 'microphone',
+ });
+ if (permissionStatus.state === 'denied') {
+ window.alert('You must grant microphone access to use this feature.');
+ } else if (permissionStatus.state === 'prompt') {
+ try {
+ const stream = await navigator.mediaDevices.getUserMedia({
+ audio: true,
+ });
+ const tracks = stream.getTracks();
+ tracks.forEach((track) => track.stop());
+ } catch (e) {
+ window.alert('You must grant microphone access to use this feature.');
+ }
+ }
+ return true;
+ }
+
+ /**
+ * List all eligible devices for recording, will request permission to use microphone
+ * @returns {Promise>}
+ */
+ async listDevices() {
+ if (
+ !navigator.mediaDevices ||
+ !('enumerateDevices' in navigator.mediaDevices)
+ ) {
+ throw new Error('Could not request user devices');
+ }
+ await this.requestPermission();
+ const devices = await navigator.mediaDevices.enumerateDevices();
+ const audioDevices = devices.filter(
+ (device) => device.kind === 'audioinput',
+ );
+ const defaultDeviceIndex = audioDevices.findIndex(
+ (device) => device.deviceId === 'default',
+ );
+ const deviceList = [];
+ if (defaultDeviceIndex !== -1) {
+ let defaultDevice = audioDevices.splice(defaultDeviceIndex, 1)[0];
+ let existingIndex = audioDevices.findIndex(
+ (device) => device.groupId === defaultDevice.groupId,
+ );
+ if (existingIndex !== -1) {
+ defaultDevice = audioDevices.splice(existingIndex, 1)[0];
+ }
+ defaultDevice.default = true;
+ deviceList.push(defaultDevice);
+ }
+ return deviceList.concat(audioDevices);
+ }
+
+ /**
+ * Begins a recording session and requests microphone permissions if not already granted
+ * Microphone recording indicator will appear on browser tab but status will be "paused"
+ * @param {string} [deviceId] if no device provided, default device will be used
+ * @returns {Promise}
+ */
+ async begin(deviceId) {
+ if (this.processor) {
+ throw new Error(
+ `Already connected: please call .end() to start a new session`,
+ );
+ }
+
+ if (
+ !navigator.mediaDevices ||
+ !('getUserMedia' in navigator.mediaDevices)
+ ) {
+ throw new Error('Could not request user media');
+ }
+ try {
+ const config = { audio: true };
+ if (deviceId) {
+ config.audio = { deviceId: { exact: deviceId } };
+ }
+ this.stream = await navigator.mediaDevices.getUserMedia(config);
+ } catch (err) {
+ throw new Error('Could not start media stream');
+ }
+
+ const context = new AudioContext({ sampleRate: this.sampleRate });
+ const source = context.createMediaStreamSource(this.stream);
+ // Load and execute the module script.
+ try {
+ await context.audioWorklet.addModule(this.scriptSrc);
+ } catch (e) {
+ console.error(e);
+ throw new Error(`Could not add audioWorklet module: ${this.scriptSrc}`);
+ }
+ const processor = new AudioWorkletNode(context, 'audio_processor');
+ processor.port.onmessage = (e) => {
+ const { event, id, data } = e.data;
+ if (event === 'receipt') {
+ this.eventReceipts[id] = data;
+ } else if (event === 'chunk') {
+ if (this._chunkProcessorSize) {
+ const buffer = this._chunkProcessorBuffer;
+ this._chunkProcessorBuffer = {
+ raw: WavPacker.mergeBuffers(buffer.raw, data.raw),
+ mono: WavPacker.mergeBuffers(buffer.mono, data.mono),
+ };
+ if (
+ this._chunkProcessorBuffer.mono.byteLength >=
+ this._chunkProcessorSize
+ ) {
+ this._chunkProcessor(this._chunkProcessorBuffer);
+ this._chunkProcessorBuffer = {
+ raw: new ArrayBuffer(0),
+ mono: new ArrayBuffer(0),
+ };
+ }
+ } else {
+ this._chunkProcessor(data);
+ }
+ }
+ };
+
+ const node = source.connect(processor);
+ const analyser = context.createAnalyser();
+ analyser.fftSize = 8192;
+ analyser.smoothingTimeConstant = 0.1;
+ node.connect(analyser);
+ if (this.outputToSpeakers) {
+ // eslint-disable-next-line no-console
+ console.warn(
+ 'Warning: Output to speakers may affect sound quality,\n' +
+ 'especially due to system audio feedback preventative measures.\n' +
+ 'use only for debugging',
+ );
+ analyser.connect(context.destination);
+ }
+
+ this.source = source;
+ this.node = node;
+ this.analyser = analyser;
+ this.processor = processor;
+ return true;
+ }
+
+ /**
+ * Gets the current frequency domain data from the recording track
+ * @param {"frequency"|"music"|"voice"} [analysisType]
+ * @param {number} [minDecibels] default -100
+ * @param {number} [maxDecibels] default -30
+ * @returns {import('./analysis/audio_analysis.js').AudioAnalysisOutputType}
+ */
+ getFrequencies(
+ analysisType = 'frequency',
+ minDecibels = -100,
+ maxDecibels = -30,
+ ) {
+ if (!this.processor) {
+ throw new Error('Session ended: please call .begin() first');
+ }
+ return AudioAnalysis.getFrequencies(
+ this.analyser,
+ this.sampleRate,
+ null,
+ analysisType,
+ minDecibels,
+ maxDecibels,
+ );
+ }
+
+ /**
+ * Pauses the recording
+ * Keeps microphone stream open but halts storage of audio
+ * @returns {Promise}
+ */
+ async pause() {
+ if (!this.processor) {
+ throw new Error('Session ended: please call .begin() first');
+ } else if (!this.recording) {
+ throw new Error('Already paused: please call .record() first');
+ }
+ if (this._chunkProcessorBuffer.raw.byteLength) {
+ this._chunkProcessor(this._chunkProcessorBuffer);
+ }
+ this.log('Pausing ...');
+ await this._event('stop');
+ this.recording = false;
+ return true;
+ }
+
+ /**
+ * Start recording stream and storing to memory from the connected audio source
+ * @param {(data: { mono: Int16Array; raw: Int16Array }) => any} [chunkProcessor]
+ * @param {number} [chunkSize] chunkProcessor will not be triggered until this size threshold met in mono audio
+ * @returns {Promise}
+ */
+ async record(chunkProcessor = () => {}, chunkSize = 8192) {
+ if (!this.processor) {
+ throw new Error('Session ended: please call .begin() first');
+ } else if (this.recording) {
+ throw new Error('Already recording: please call .pause() first');
+ } else if (typeof chunkProcessor !== 'function') {
+ throw new Error(`chunkProcessor must be a function`);
+ }
+ this._chunkProcessor = chunkProcessor;
+ this._chunkProcessorSize = chunkSize;
+ this._chunkProcessorBuffer = {
+ raw: new ArrayBuffer(0),
+ mono: new ArrayBuffer(0),
+ };
+ this.log('Recording ...');
+ await this._event('start');
+ this.recording = true;
+ return true;
+ }
+
+ /**
+ * Clears the audio buffer, empties stored recording
+ * @returns {Promise}
+ */
+ async clear() {
+ if (!this.processor) {
+ throw new Error('Session ended: please call .begin() first');
+ }
+ await this._event('clear');
+ return true;
+ }
+
+ /**
+ * Reads the current audio stream data
+ * @returns {Promise<{meanValues: Float32Array, channels: Array}>}
+ */
+ async read() {
+ if (!this.processor) {
+ throw new Error('Session ended: please call .begin() first');
+ }
+ this.log('Reading ...');
+ const result = await this._event('read');
+ return result;
+ }
+
+ /**
+ * Saves the current audio stream to a file
+ * @param {boolean} [force] Force saving while still recording
+ * @returns {Promise}
+ */
+ async save(force = false) {
+ if (!this.processor) {
+ throw new Error('Session ended: please call .begin() first');
+ }
+ if (!force && this.recording) {
+ throw new Error(
+ 'Currently recording: please call .pause() first, or call .save(true) to force',
+ );
+ }
+ this.log('Exporting ...');
+ const exportData = await this._event('export');
+ const packer = new WavPacker();
+ const result = packer.pack(this.sampleRate, exportData.audio);
+ return result;
+ }
+
+ /**
+ * Ends the current recording session and saves the result
+ * @returns {Promise}
+ */
+ async end() {
+ if (!this.processor) {
+ throw new Error('Session ended: please call .begin() first');
+ }
+
+ const _processor = this.processor;
+
+ this.log('Stopping ...');
+ await this._event('stop');
+ this.recording = false;
+ const tracks = this.stream.getTracks();
+ tracks.forEach((track) => track.stop());
+
+ this.log('Exporting ...');
+ const exportData = await this._event('export', {}, _processor);
+
+ this.processor.disconnect();
+ this.source.disconnect();
+ this.node.disconnect();
+ this.analyser.disconnect();
+ this.stream = null;
+ this.processor = null;
+ this.source = null;
+ this.node = null;
+
+ const packer = new WavPacker();
+ const result = packer.pack(this.sampleRate, exportData.audio);
+ return result;
+ }
+
+ /**
+ * Performs a full cleanup of WavRecorder instance
+ * Stops actively listening via microphone and removes existing listeners
+ * @returns {Promise}
+ */
+ async quit() {
+ this.listenForDeviceChange(null);
+ if (this.processor) {
+ await this.end();
+ }
+ return true;
+ }
+}
+
+globalThis.WavRecorder = WavRecorder;
diff --git a/web/src/lib/wavtools/lib/wav_stream_player.js b/web/src/lib/wavtools/lib/wav_stream_player.js
new file mode 100644
index 00000000..500eff6c
--- /dev/null
+++ b/web/src/lib/wavtools/lib/wav_stream_player.js
@@ -0,0 +1,160 @@
+import { StreamProcessorSrc } from './worklets/stream_processor.js';
+import { AudioAnalysis } from './analysis/audio_analysis.js';
+
+/**
+ * Plays audio streams received in raw PCM16 chunks from the browser
+ * @class
+ */
+export class WavStreamPlayer {
+ /**
+ * Creates a new WavStreamPlayer instance
+ * @param {{sampleRate?: number}} options
+ * @returns {WavStreamPlayer}
+ */
+ constructor({ sampleRate = 44100 } = {}) {
+ this.scriptSrc = StreamProcessorSrc;
+ this.sampleRate = sampleRate;
+ this.context = null;
+ this.stream = null;
+ this.analyser = null;
+ this.trackSampleOffsets = {};
+ this.interruptedTrackIds = {};
+ }
+
+ /**
+ * Connects the audio context and enables output to speakers
+ * @returns {Promise}
+ */
+ async connect() {
+ this.context = new AudioContext({ sampleRate: this.sampleRate });
+ if (this.context.state === 'suspended') {
+ await this.context.resume();
+ }
+ try {
+ await this.context.audioWorklet.addModule(this.scriptSrc);
+ } catch (e) {
+ console.error(e);
+ throw new Error(`Could not add audioWorklet module: ${this.scriptSrc}`);
+ }
+ const analyser = this.context.createAnalyser();
+ analyser.fftSize = 8192;
+ analyser.smoothingTimeConstant = 0.1;
+ this.analyser = analyser;
+ return true;
+ }
+
+ /**
+ * Gets the current frequency domain data from the playing track
+ * @param {"frequency"|"music"|"voice"} [analysisType]
+ * @param {number} [minDecibels] default -100
+ * @param {number} [maxDecibels] default -30
+ * @returns {import('./analysis/audio_analysis.js').AudioAnalysisOutputType}
+ */
+ getFrequencies(
+ analysisType = 'frequency',
+ minDecibels = -100,
+ maxDecibels = -30
+ ) {
+ if (!this.analyser) {
+ throw new Error('Not connected, please call .connect() first');
+ }
+ return AudioAnalysis.getFrequencies(
+ this.analyser,
+ this.sampleRate,
+ null,
+ analysisType,
+ minDecibels,
+ maxDecibels
+ );
+ }
+
+ /**
+ * Starts audio streaming
+ * @private
+ * @returns {Promise}
+ */
+ _start() {
+ const streamNode = new AudioWorkletNode(this.context, 'stream_processor');
+ streamNode.connect(this.context.destination);
+ streamNode.port.onmessage = (e) => {
+ const { event } = e.data;
+ if (event === 'stop') {
+ streamNode.disconnect();
+ this.stream = null;
+ } else if (event === 'offset') {
+ const { requestId, trackId, offset } = e.data;
+ const currentTime = offset / this.sampleRate;
+ this.trackSampleOffsets[requestId] = { trackId, offset, currentTime };
+ }
+ };
+ this.analyser.disconnect();
+ streamNode.connect(this.analyser);
+ this.stream = streamNode;
+ return true;
+ }
+
+ /**
+ * Adds 16BitPCM data to the currently playing audio stream
+ * You can add chunks beyond the current play point and they will be queued for play
+ * @param {ArrayBuffer|Int16Array} arrayBuffer
+ * @param {string} [trackId]
+ * @returns {Int16Array}
+ */
+ add16BitPCM(arrayBuffer, trackId = 'default') {
+ if (typeof trackId !== 'string') {
+ throw new Error(`trackId must be a string`);
+ } else if (this.interruptedTrackIds[trackId]) {
+ return;
+ }
+ if (!this.stream) {
+ this._start();
+ }
+ let buffer;
+ if (arrayBuffer instanceof Int16Array) {
+ buffer = arrayBuffer;
+ } else if (arrayBuffer instanceof ArrayBuffer) {
+ buffer = new Int16Array(arrayBuffer);
+ } else {
+ throw new Error(`argument must be Int16Array or ArrayBuffer`);
+ }
+ this.stream.port.postMessage({ event: 'write', buffer, trackId });
+ return buffer;
+ }
+
+ /**
+ * Gets the offset (sample count) of the currently playing stream
+ * @param {boolean} [interrupt]
+ * @returns {{trackId: string|null, offset: number, currentTime: number}}
+ */
+ async getTrackSampleOffset(interrupt = false) {
+ if (!this.stream) {
+ return null;
+ }
+ const requestId = crypto.randomUUID();
+ this.stream.port.postMessage({
+ event: interrupt ? 'interrupt' : 'offset',
+ requestId,
+ });
+ let trackSampleOffset;
+ while (!trackSampleOffset) {
+ trackSampleOffset = this.trackSampleOffsets[requestId];
+ await new Promise((r) => setTimeout(() => r(), 1));
+ }
+ const { trackId } = trackSampleOffset;
+ if (interrupt && trackId) {
+ this.interruptedTrackIds[trackId] = true;
+ }
+ return trackSampleOffset;
+ }
+
+ /**
+ * Strips the current stream and returns the sample offset of the audio
+ * @param {boolean} [interrupt]
+ * @returns {{trackId: string|null, offset: number, currentTime: number}}
+ */
+ async interrupt() {
+ return this.getTrackSampleOffset(true);
+ }
+}
+
+globalThis.WavStreamPlayer = WavStreamPlayer;
diff --git a/web/src/lib/wavtools/lib/worklets/audio_processor.js b/web/src/lib/wavtools/lib/worklets/audio_processor.js
new file mode 100644
index 00000000..61dd7ec9
--- /dev/null
+++ b/web/src/lib/wavtools/lib/worklets/audio_processor.js
@@ -0,0 +1,214 @@
+const AudioProcessorWorklet = `
+class AudioProcessor extends AudioWorkletProcessor {
+
+ constructor() {
+ super();
+ this.port.onmessage = this.receive.bind(this);
+ this.initialize();
+ }
+
+ initialize() {
+ this.foundAudio = false;
+ this.recording = false;
+ this.chunks = [];
+ }
+
+ /**
+ * Concatenates sampled chunks into channels
+ * Format is chunk[Left[], Right[]]
+ */
+ readChannelData(chunks, channel = -1, maxChannels = 9) {
+ let channelLimit;
+ if (channel !== -1) {
+ if (chunks[0] && chunks[0].length - 1 < channel) {
+ throw new Error(
+ \`Channel \${channel} out of range: max \${chunks[0].length}\`
+ );
+ }
+ channelLimit = channel + 1;
+ } else {
+ channel = 0;
+ channelLimit = Math.min(chunks[0] ? chunks[0].length : 1, maxChannels);
+ }
+ const channels = [];
+ for (let n = channel; n < channelLimit; n++) {
+ const length = chunks.reduce((sum, chunk) => {
+ return sum + chunk[n].length;
+ }, 0);
+ const buffers = chunks.map((chunk) => chunk[n]);
+ const result = new Float32Array(length);
+ let offset = 0;
+ for (let i = 0; i < buffers.length; i++) {
+ result.set(buffers[i], offset);
+ offset += buffers[i].length;
+ }
+ channels[n] = result;
+ }
+ return channels;
+ }
+
+ /**
+ * Combines parallel audio data into correct format,
+ * channels[Left[], Right[]] to float32Array[LRLRLRLR...]
+ */
+ formatAudioData(channels) {
+ if (channels.length === 1) {
+ // Simple case is only one channel
+ const float32Array = channels[0].slice();
+ const meanValues = channels[0].slice();
+ return { float32Array, meanValues };
+ } else {
+ const float32Array = new Float32Array(
+ channels[0].length * channels.length
+ );
+ const meanValues = new Float32Array(channels[0].length);
+ for (let i = 0; i < channels[0].length; i++) {
+ const offset = i * channels.length;
+ let meanValue = 0;
+ for (let n = 0; n < channels.length; n++) {
+ float32Array[offset + n] = channels[n][i];
+ meanValue += channels[n][i];
+ }
+ meanValues[i] = meanValue / channels.length;
+ }
+ return { float32Array, meanValues };
+ }
+ }
+
+ /**
+ * Converts 32-bit float data to 16-bit integers
+ */
+ floatTo16BitPCM(float32Array) {
+ const buffer = new ArrayBuffer(float32Array.length * 2);
+ const view = new DataView(buffer);
+ let offset = 0;
+ for (let i = 0; i < float32Array.length; i++, offset += 2) {
+ let s = Math.max(-1, Math.min(1, float32Array[i]));
+ view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true);
+ }
+ return buffer;
+ }
+
+ /**
+ * Retrieves the most recent amplitude values from the audio stream
+ * @param {number} channel
+ */
+ getValues(channel = -1) {
+ const channels = this.readChannelData(this.chunks, channel);
+ const { meanValues } = this.formatAudioData(channels);
+ return { meanValues, channels };
+ }
+
+ /**
+ * Exports chunks as an audio/wav file
+ */
+ export() {
+ const channels = this.readChannelData(this.chunks);
+ const { float32Array, meanValues } = this.formatAudioData(channels);
+ const audioData = this.floatTo16BitPCM(float32Array);
+ return {
+ meanValues: meanValues,
+ audio: {
+ bitsPerSample: 16,
+ channels: channels,
+ data: audioData,
+ },
+ };
+ }
+
+ receive(e) {
+ const { event, id } = e.data;
+ let receiptData = {};
+ switch (event) {
+ case 'start':
+ this.recording = true;
+ break;
+ case 'stop':
+ this.recording = false;
+ break;
+ case 'clear':
+ this.initialize();
+ break;
+ case 'export':
+ receiptData = this.export();
+ break;
+ case 'read':
+ receiptData = this.getValues();
+ break;
+ default:
+ break;
+ }
+ // Always send back receipt
+ this.port.postMessage({ event: 'receipt', id, data: receiptData });
+ }
+
+ sendChunk(chunk) {
+ const channels = this.readChannelData([chunk]);
+ const { float32Array, meanValues } = this.formatAudioData(channels);
+ const rawAudioData = this.floatTo16BitPCM(float32Array);
+ const monoAudioData = this.floatTo16BitPCM(meanValues);
+ this.port.postMessage({
+ event: 'chunk',
+ data: {
+ mono: monoAudioData,
+ raw: rawAudioData,
+ },
+ });
+ }
+
+ process(inputList, outputList, parameters) {
+ // Copy input to output (e.g. speakers)
+ // Note that this creates choppy sounds with Mac products
+ const sourceLimit = Math.min(inputList.length, outputList.length);
+ for (let inputNum = 0; inputNum < sourceLimit; inputNum++) {
+ const input = inputList[inputNum];
+ const output = outputList[inputNum];
+ const channelCount = Math.min(input.length, output.length);
+ for (let channelNum = 0; channelNum < channelCount; channelNum++) {
+ input[channelNum].forEach((sample, i) => {
+ output[channelNum][i] = sample;
+ });
+ }
+ }
+ const inputs = inputList[0];
+ // There's latency at the beginning of a stream before recording starts
+ // Make sure we actually receive audio data before we start storing chunks
+ let sliceIndex = 0;
+ if (!this.foundAudio) {
+ for (const channel of inputs) {
+ sliceIndex = 0; // reset for each channel
+ if (this.foundAudio) {
+ break;
+ }
+ if (channel) {
+ for (const value of channel) {
+ if (value !== 0) {
+ // find only one non-zero entry in any channel
+ this.foundAudio = true;
+ break;
+ } else {
+ sliceIndex++;
+ }
+ }
+ }
+ }
+ }
+ if (inputs && inputs[0] && this.foundAudio && this.recording) {
+ // We need to copy the TypedArray, because the \`process\`
+ // internals will reuse the same buffer to hold each input
+ const chunk = inputs.map((input) => input.slice(sliceIndex));
+ this.chunks.push(chunk);
+ this.sendChunk(chunk);
+ }
+ return true;
+ }
+}
+
+registerProcessor('audio_processor', AudioProcessor);
+`;
+
+const script = new Blob([AudioProcessorWorklet], {
+ type: 'application/javascript',
+});
+const src = URL.createObjectURL(script);
+export const AudioProcessorSrc = src;
diff --git a/web/src/lib/wavtools/lib/worklets/stream_processor.js b/web/src/lib/wavtools/lib/worklets/stream_processor.js
new file mode 100644
index 00000000..d3c794a8
--- /dev/null
+++ b/web/src/lib/wavtools/lib/worklets/stream_processor.js
@@ -0,0 +1,96 @@
+export const StreamProcessorWorklet = `
+class StreamProcessor extends AudioWorkletProcessor {
+ constructor() {
+ super();
+ this.hasStarted = false;
+ this.hasInterrupted = false;
+ this.outputBuffers = [];
+ this.bufferLength = 128;
+ this.write = { buffer: new Float32Array(this.bufferLength), trackId: null };
+ this.writeOffset = 0;
+ this.trackSampleOffsets = {};
+ this.port.onmessage = (event) => {
+ if (event.data) {
+ const payload = event.data;
+ if (payload.event === 'write') {
+ const int16Array = payload.buffer;
+ const float32Array = new Float32Array(int16Array.length);
+ for (let i = 0; i < int16Array.length; i++) {
+ float32Array[i] = int16Array[i] / 0x8000; // Convert Int16 to Float32
+ }
+ this.writeData(float32Array, payload.trackId);
+ } else if (
+ payload.event === 'offset' ||
+ payload.event === 'interrupt'
+ ) {
+ const requestId = payload.requestId;
+ const trackId = this.write.trackId;
+ const offset = this.trackSampleOffsets[trackId] || 0;
+ this.port.postMessage({
+ event: 'offset',
+ requestId,
+ trackId,
+ offset,
+ });
+ if (payload.event === 'interrupt') {
+ this.hasInterrupted = true;
+ }
+ } else {
+ throw new Error(\`Unhandled event "\${payload.event}"\`);
+ }
+ }
+ };
+ }
+
+ writeData(float32Array, trackId = null) {
+ let { buffer } = this.write;
+ let offset = this.writeOffset;
+ for (let i = 0; i < float32Array.length; i++) {
+ buffer[offset++] = float32Array[i];
+ if (offset >= buffer.length) {
+ this.outputBuffers.push(this.write);
+ this.write = { buffer: new Float32Array(this.bufferLength), trackId };
+ buffer = this.write.buffer;
+ offset = 0;
+ }
+ }
+ this.writeOffset = offset;
+ return true;
+ }
+
+ process(inputs, outputs, parameters) {
+ const output = outputs[0];
+ const outputChannelData = output[0];
+ const outputBuffers = this.outputBuffers;
+ if (this.hasInterrupted) {
+ this.port.postMessage({ event: 'stop' });
+ return false;
+ } else if (outputBuffers.length) {
+ this.hasStarted = true;
+ const { buffer, trackId } = outputBuffers.shift();
+ for (let i = 0; i < outputChannelData.length; i++) {
+ outputChannelData[i] = buffer[i] || 0;
+ }
+ if (trackId) {
+ this.trackSampleOffsets[trackId] =
+ this.trackSampleOffsets[trackId] || 0;
+ this.trackSampleOffsets[trackId] += buffer.length;
+ }
+ return true;
+ } else if (this.hasStarted) {
+ this.port.postMessage({ event: 'stop' });
+ return false;
+ } else {
+ return true;
+ }
+ }
+}
+
+registerProcessor('stream_processor', StreamProcessor);
+`;
+
+const script = new Blob([StreamProcessorWorklet], {
+ type: 'application/javascript',
+});
+const src = URL.createObjectURL(script);
+export const StreamProcessorSrc = src;
diff --git a/web/src/utils/conversation_config.js b/web/src/utils/conversation_config.js
new file mode 100644
index 00000000..bb9a0496
--- /dev/null
+++ b/web/src/utils/conversation_config.js
@@ -0,0 +1,16 @@
+export const instructions = `System settings:
+Tool use: enabled.
+
+Instructions:
+- You are an artificial intelligence agent responsible for helping test realtime voice capabilities
+- Please make sure to respond with a helpful voice via audio
+- Be kind, helpful, and curteous
+- It is okay to ask the user questions
+- Use tools and functions you have available liberally, it is part of the training apparatus
+- Be open to exploration and conversation
+- Remember: this is just for fun and testing!
+
+Personality:
+- Be upbeat and genuine
+- Try speaking quickly as if excited
+`;
diff --git a/web/src/utils/wav_player.js b/web/src/utils/wav_player.js
new file mode 100644
index 00000000..32dc6eff
--- /dev/null
+++ b/web/src/utils/wav_player.js
@@ -0,0 +1,27 @@
+// 播放 PCM16 语音流
+export const playPCM16 = (pcm16Array, sampleRate = 44100) => {
+ try {
+ // 创建 AudioContext
+ const audioContext = new (window.AudioContext || window.webkitAudioContext)();
+
+ // 将 Int16Array 转换为 Float32Array (Web Audio API 使用 Float32)
+ let float32Array = new Float32Array(pcm16Array.length);
+ for (let i = 0; i < pcm16Array.length; i++) {
+ float32Array[i] = pcm16Array[i] / 32768; // Int16 转换为 Float32
+ }
+
+ // 创建 AudioBuffer
+ const audioBuffer = audioContext.createBuffer(1, float32Array.length, sampleRate); // 单声道
+ audioBuffer.getChannelData(0).set(float32Array); // 设置音频数据
+
+ // 创建 AudioBufferSourceNode 并播放音频
+ const source = audioContext.createBufferSource();
+ source.buffer = audioBuffer;
+ source.connect(audioContext.destination); // 连接到扬声器
+ source.start(); // 播放
+ return source
+ } catch (e) {
+ console.warn(e)
+ return null
+ }
+}
diff --git a/web/src/utils/wav_renderer.js b/web/src/utils/wav_renderer.js
new file mode 100644
index 00000000..7a18e128
--- /dev/null
+++ b/web/src/utils/wav_renderer.js
@@ -0,0 +1,81 @@
+const dataMap = new WeakMap();
+
+const normalizeArray = (data, m, downsamplePeaks = false, memoize = false) => {
+ let cache, mKey, dKey;
+ if (memoize) {
+ mKey = m.toString();
+ dKey = downsamplePeaks.toString();
+ cache = dataMap.has(data)? dataMap.get(data) : {};
+ dataMap.set(data, cache);
+ cache[mKey] = cache[mKey] || {};
+ if (cache[mKey][dKey]) {
+ return cache[mKey][dKey];
+ }
+ }
+ const n = data.length;
+ const result = new Array(m);
+ if (m <= n) {
+ // Downsampling
+ result.fill(0);
+ const count = new Array(m).fill(0);
+ for (let i = 0; i < n; i++) {
+ const index = Math.floor(i * (m / n));
+ if (downsamplePeaks) {
+ // take highest result in the set
+ result[index] = Math.max(result[index], Math.abs(data[i]));
+ } else {
+ result[index] += Math.abs(data[i]);
+ }
+ count[index]++;
+ }
+ if (!downsamplePeaks) {
+ for (let i = 0; i < result.length; i++) {
+ result[i] = result[i] / count[i];
+ }
+ }
+ } else {
+ for (let i = 0; i < m; i++) {
+ const index = (i * (n - 1)) / (m - 1);
+ const low = Math.floor(index);
+ const high = Math.ceil(index);
+ const t = index - low;
+ if (high >= n) {
+ result[i] = data[n - 1];
+ } else {
+ result[i] = data[low] * (1 - t) + data[high] * t;
+ }
+ }
+ }
+ if (memoize) {
+ cache[mKey][dKey] = result;
+ }
+ return result;
+};
+
+export const WavRenderer = {
+ drawBars: (canvas, ctx, data, color, pointCount = 0, barWidth = 0, barSpacing = 0, center = false) => {
+ pointCount = Math.floor(
+ Math.min(
+ pointCount,
+ (canvas.width - barSpacing) / (Math.max(barWidth, 1) + barSpacing)
+ )
+ );
+ if (!pointCount) {
+ pointCount = Math.floor(
+ (canvas.width - barSpacing) / (Math.max(barWidth, 1) + barSpacing)
+ );
+ }
+ if (!barWidth) {
+ barWidth = (canvas.width - barSpacing) / pointCount - barSpacing;
+ }
+ const points = normalizeArray(data, pointCount, true);
+ for (let i = 0; i < pointCount; i++) {
+ const amplitude = Math.abs(points[i]);
+ const height = Math.max(1, amplitude * canvas.height);
+ const x = barSpacing + i * (barWidth + barSpacing);
+ const y = center? (canvas.height - height) / 2 : canvas.height - height;
+ ctx.fillStyle = color;
+ ctx.fillRect(x, y, barWidth, height);
+ }
+ },
+};
\ No newline at end of file
diff --git a/web/src/views/Test.vue b/web/src/views/Test.vue
index 14c82748..7d7bfedc 100644
--- a/web/src/views/Test.vue
+++ b/web/src/views/Test.vue
@@ -1,307 +1,471 @@
-
-
-
-
+
+
+
+

+
realtime console
+
-
-
-
-
+
+
+
+
+
events
+
+
+ awaiting connection...
+
+
+
+
+ {{ formatTime(realtimeEvent.time) }}
+
+
+
+
+
+
+ {{ realtimeEvent.event.type === 'error'
+ ? 'error!'
+ : realtimeEvent.source }}
+
+
+
+ {{ realtimeEvent.event.type }}
+ {{ realtimeEvent.count ? `(${realtimeEvent.count})` : '' }}
+
+
+
+ {{ JSON.stringify(realtimeEvent.event, null, 2) }}
+
+
+
+
+
+
+
+
conversation
+
+
+ awaiting connection...
+
+
+
+
+
+ {{
+ (conversationItem.role || conversationItem.type).replaceAll(
+ '_',
+ ' '
+ )
+ }}
+
+
+
+
+
+
+
+
+ {{ conversationItem.formatted.output }}
+
+
+
+ {{ conversationItem.formatted.tool.name }}(
+ {{ conversationItem.formatted.tool.arguments }})
+
+
+ {{
+ conversationItem.formatted.transcript ||
+ (conversationItem.formatted.audio?.length
+ ? '(awaiting transcript)'
+ : conversationItem.formatted.text || '(item sent)')
+ }}
+
+
+ {{
+ conversationItem.formatted.transcript ||
+ conversationItem.formatted.text ||
+ '(truncated)'
+ }}
+
+
+
+
+
+
+
+
+
+ {{isConnected ? '断开连接' : '连接对话'}}
+
+
+ 开始讲话
+
-
-
-
-
-
-
-
+
-
\ No newline at end of file
diff --git a/web/src/views/Test2.vue b/web/src/views/Test2.vue
index ae198da9..447d0ca2 100644
--- a/web/src/views/Test2.vue
+++ b/web/src/views/Test2.vue
@@ -81,7 +81,7 @@ onMounted(() => {
x += barWidth + 2;
}
- requestAnimationFrame(draw);
+ //requestAnimationFrame(draw);
}
draw();
diff --git a/web/src/views/Test3.vue b/web/src/views/Test3.vue
new file mode 100644
index 00000000..da2492a2
--- /dev/null
+++ b/web/src/views/Test3.vue
@@ -0,0 +1,184 @@
+
+
+ 开始语音对话
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file