From a90090c162c9e5db797b244052666105ea285257 Mon Sep 17 00:00:00 2001 From: dakai Date: Sun, 13 Oct 2024 14:29:57 +0800 Subject: [PATCH 1/6] feat: add PlayIcon and StopPlayIcon to chat component --- app/components/chat.tsx | 68 +++++++++++++++++++++++++++++++++++------ app/icons/play.svg | 1 + app/icons/stop.svg | 1 + app/utils.ts | 13 ++++++++ 4 files changed, 74 insertions(+), 9 deletions(-) create mode 100644 app/icons/play.svg create mode 100644 app/icons/stop.svg diff --git a/app/components/chat.tsx b/app/components/chat.tsx index 3d519dee7..eb2848f08 100644 --- a/app/components/chat.tsx +++ b/app/components/chat.tsx @@ -1227,16 +1227,19 @@ function _Chat() { }); } setSpeechStatus(true); - ttsPlayer - .play(audioBuffer, () => { + try { + await ttsPlayer.play(audioBuffer, () => { setSpeechStatus(false); - }) - .catch((e) => { - console.error("[OpenAI Speech]", e); - showToast(prettyObject(e)); - setSpeechStatus(false); - }) - .finally(() => setSpeechLoading(false)); + }); + const audioFile = new Blob([audioBuffer], { type: "audio/wav" }); + const url = uploadAudio(audioFile); + } catch (e) { + console.error("[OpenAI Speech]", e); + showToast(prettyObject(e)); + setSpeechStatus(false); + } finally { + setSpeechLoading(false); + } } } @@ -1505,6 +1508,53 @@ function _Chat() { setAttachImages(images); } + async function uploadAudio(file: Blob) { + const audioUrl: string = await uploadImageRemote(file); + console.log("audioUrl: ", audioUrl); + //const images: string[] = []; + //images.push(...attachImages); + + //images.push( + // ...(await new Promise((res, rej) => { + // const fileInput = document.createElement("input"); + // fileInput.type = "file"; + // fileInput.accept = + // "image/png, image/jpeg, image/webp, image/heic, image/heif"; + // fileInput.multiple = true; + // fileInput.onchange = (event: any) => { + // setUploading(true); + // const files = event.target.files; + // const imagesData: string[] = []; + // for (let i = 0; i < files.length; i++) { + // const file = event.target.files[i]; + // uploadImageRemote(file) + // .then((dataUrl) => { + // imagesData.push(dataUrl); + // if ( + // imagesData.length === 3 || + // imagesData.length === files.length + // ) { + // setUploading(false); + // res(imagesData); + // } + // }) + // .catch((e) => { + // setUploading(false); + // rej(e); + // }); + // } + // }; + // fileInput.click(); + // })), + //); + + //const imagesLength = images.length; + //if (imagesLength > 3) { + // images.splice(3, imagesLength - 3); + //} + //setAttachImages(images); + } + // 快捷键 shortcut keys const [showShortcutKeyModal, setShowShortcutKeyModal] = useState(false); diff --git a/app/icons/play.svg b/app/icons/play.svg new file mode 100644 index 000000000..64e3efca4 --- /dev/null +++ b/app/icons/play.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/app/icons/stop.svg b/app/icons/stop.svg new file mode 100644 index 000000000..9037b3390 --- /dev/null +++ b/app/icons/stop.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/app/utils.ts b/app/utils.ts index 6b2f65952..b6cb6f6d9 100644 --- a/app/utils.ts +++ b/app/utils.ts @@ -250,6 +250,19 @@ export function getMessageImages(message: RequestMessage): string[] { return urls; } +//export function getMessageAudio(message: RequestMessage): string[] { +// if (typeof message.content === "string") { +// return []; +// } +// const urls: string[] = []; +// for (const c of message.content) { +// if (c.type === "image_url") { +// urls.push(c.image_url?.url ?? ""); +// } +// } +// return urls; +//} + export function isVisionModel(model: string) { // Note: This is a better way using the TypeScript feature instead of `&&` or `||` (ts v5.5.0-dev.20240314 I've been using) From 853212cf135a97ba8023eda23ea8814a8d8363d8 Mon Sep 17 00:00:00 2001 From: Dakai Ou Date: Sun, 13 Oct 2024 17:46:08 +0800 Subject: [PATCH 2/6] feat: add arrayBufferToWav utility for audio processing --- app/components/chat.tsx | 8 ++++--- app/utils/audio.ts | 51 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 3 deletions(-) diff --git a/app/components/chat.tsx b/app/components/chat.tsx index eb2848f08..81858bc8a 100644 --- a/app/components/chat.tsx +++ b/app/components/chat.tsx @@ -117,7 +117,7 @@ import { MultimodalContent } from "../client/api"; const localStorage = safeLocalStorage(); import { ClientApi } from "../client/api"; -import { createTTSPlayer } from "../utils/audio"; +import { createTTSPlayer, arrayBufferToWav } from "../utils/audio"; import { MsEdgeTTS, OUTPUT_FORMAT } from "../utils/ms_edge_tts"; const ttsPlayer = createTTSPlayer(); @@ -1228,11 +1228,13 @@ function _Chat() { } setSpeechStatus(true); try { + const waveFile = arrayBufferToWav(audioBuffer); + const audioFile = new Blob([waveFile], { type: "audio/wav" }); + const url = uploadAudio(audioFile); + await ttsPlayer.play(audioBuffer, () => { setSpeechStatus(false); }); - const audioFile = new Blob([audioBuffer], { type: "audio/wav" }); - const url = uploadAudio(audioFile); } catch (e) { console.error("[OpenAI Speech]", e); showToast(prettyObject(e)); diff --git a/app/utils/audio.ts b/app/utils/audio.ts index f6828c7aa..9af258812 100644 --- a/app/utils/audio.ts +++ b/app/utils/audio.ts @@ -43,3 +43,54 @@ export function createTTSPlayer(): TTSPlayer { return { init, play, stop }; } + +export function arrayBufferToWav(buffer: ArrayBuffer): ArrayBuffer { + const numOfChannels = 1; // Mono + const sampleRate = 24000; // 24kHz + const bitsPerSample = 16; + + const bytesPerSample = bitsPerSample / 8; + const blockAlign = numOfChannels * bytesPerSample; + const byteRate = sampleRate * blockAlign; + + // WAV header size is 44 bytes + const wavHeaderSize = 44; + const dataSize = buffer.byteLength; + const totalSize = wavHeaderSize + dataSize; + + const wavBuffer = new ArrayBuffer(totalSize); + const view = new DataView(wavBuffer); + + // RIFF chunk descriptor + writeString(view, 0, "RIFF"); + view.setUint32(4, totalSize - 8, true); // File size minus RIFF header + writeString(view, 8, "WAVE"); + + // FMT sub-chunk + writeString(view, 12, "fmt "); + view.setUint32(16, 16, true); // Sub-chunk size (16 for PCM) + view.setUint16(20, 1, true); // Audio format (1 for PCM) + view.setUint16(22, numOfChannels, true); // Number of channels + view.setUint32(24, sampleRate, true); // Sample rate + view.setUint32(28, byteRate, true); // Byte rate + view.setUint16(32, blockAlign, true); // Block align + view.setUint16(34, bitsPerSample, true); // Bits per sample + + // Data sub-chunk + writeString(view, 36, "data"); + view.setUint32(40, dataSize, true); // Data size + + // Write the PCM samples + const audioData = new Uint8Array(buffer); + const wavData = new Uint8Array(wavBuffer); + wavData.set(audioData, wavHeaderSize); + + return wavBuffer; +} + +// Helper function to write a string to the DataView +function writeString(view: DataView, offset: number, string: string) { + for (let i = 0; i < string.length; i++) { + view.setUint8(offset + i, string.charCodeAt(i)); + } +} From ad1ce9293707012a243f59a547614d8f1889e50a Mon Sep 17 00:00:00 2001 From: dakai Date: Mon, 14 Oct 2024 02:00:46 +0800 Subject: [PATCH 3/6] style: update audio element styling in chat module --- app/client/api.ts | 1 + app/components/chat.module.scss | 14 ++++-- app/components/chat.tsx | 87 +++++++++++++-------------------- app/styles/globals.scss | 9 ++++ package.json | 4 +- 5 files changed, 57 insertions(+), 58 deletions(-) diff --git a/app/client/api.ts b/app/client/api.ts index 7a242ea99..a3da99ba7 100644 --- a/app/client/api.ts +++ b/app/client/api.ts @@ -39,6 +39,7 @@ export interface MultimodalContent { export interface RequestMessage { role: MessageRole; content: string | MultimodalContent[]; + audio_url?: string; } export interface LLMConfig { diff --git a/app/components/chat.module.scss b/app/components/chat.module.scss index 73542fc67..597299994 100644 --- a/app/components/chat.module.scss +++ b/app/components/chat.module.scss @@ -430,7 +430,7 @@ .chat-message-item { box-sizing: border-box; - max-width: 100%; + max-width: 300px; margin-top: 10px; border-radius: 10px; background-color: rgba(0, 0, 0, 0.05); @@ -443,6 +443,10 @@ transition: all ease 0.3s; } +.audio-message { + min-width: 350px; +} + .chat-message-item-image { width: 100%; margin-top: 10px; @@ -471,6 +475,10 @@ border: rgba($color: #888, $alpha: 0.2) 1px solid; } +.chat-message-item-audio { + margin-top: 10px; + width: 100%; +} @media only screen and (max-width: 600px) { $calc-image-width: calc(100vw/3*2/var(--image-count)); @@ -519,7 +527,7 @@ background-color: var(--second); &:hover { - min-width: 0; + //min-width: 350px; } } @@ -693,4 +701,4 @@ .shortcut-key span { font-size: 12px; color: var(--black); -} \ No newline at end of file +} diff --git a/app/components/chat.tsx b/app/components/chat.tsx index 81858bc8a..1f8a13e4b 100644 --- a/app/components/chat.tsx +++ b/app/components/chat.tsx @@ -1121,6 +1121,15 @@ function _Chat() { ); }; + const updateMessageAudio = (msgId?: string, audio_url?: string) => { + chatStore.updateCurrentSession( + (session) => + (session.messages = session.messages.map((m) => + m.id === msgId ? { ...m, audio_url } : m, + )), + ); + }; + const onDelete = (msgId: string) => { deleteMessage(msgId); }; @@ -1197,7 +1206,7 @@ function _Chat() { const accessStore = useAccessStore(); const [speechStatus, setSpeechStatus] = useState(false); const [speechLoading, setSpeechLoading] = useState(false); - async function openaiSpeech(text: string) { + async function openaiSpeech(text: string): Promise { if (speechStatus) { ttsPlayer.stop(); setSpeechStatus(false); @@ -1230,11 +1239,12 @@ function _Chat() { try { const waveFile = arrayBufferToWav(audioBuffer); const audioFile = new Blob([waveFile], { type: "audio/wav" }); - const url = uploadAudio(audioFile); + const audioUrl: string = await uploadImageRemote(audioFile); await ttsPlayer.play(audioBuffer, () => { setSpeechStatus(false); }); + return audioUrl; } catch (e) { console.error("[OpenAI Speech]", e); showToast(prettyObject(e)); @@ -1510,53 +1520,6 @@ function _Chat() { setAttachImages(images); } - async function uploadAudio(file: Blob) { - const audioUrl: string = await uploadImageRemote(file); - console.log("audioUrl: ", audioUrl); - //const images: string[] = []; - //images.push(...attachImages); - - //images.push( - // ...(await new Promise((res, rej) => { - // const fileInput = document.createElement("input"); - // fileInput.type = "file"; - // fileInput.accept = - // "image/png, image/jpeg, image/webp, image/heic, image/heif"; - // fileInput.multiple = true; - // fileInput.onchange = (event: any) => { - // setUploading(true); - // const files = event.target.files; - // const imagesData: string[] = []; - // for (let i = 0; i < files.length; i++) { - // const file = event.target.files[i]; - // uploadImageRemote(file) - // .then((dataUrl) => { - // imagesData.push(dataUrl); - // if ( - // imagesData.length === 3 || - // imagesData.length === files.length - // ) { - // setUploading(false); - // res(imagesData); - // } - // }) - // .catch((e) => { - // setUploading(false); - // rej(e); - // }); - // } - // }; - // fileInput.click(); - // })), - //); - - //const imagesLength = images.length; - //if (imagesLength > 3) { - // images.splice(3, imagesLength - 3); - //} - //setAttachImages(images); - } - // 快捷键 shortcut keys const [showShortcutKeyModal, setShowShortcutKeyModal] = useState(false); @@ -1845,9 +1808,12 @@ function _Chat() { ) } - onClick={() => - openaiSpeech(getMessageTextContent(message)) - } + onClick={async () => { + const url = await openaiSpeech( + getMessageTextContent(message), + ); + updateMessageAudio(message.id, url); + }} /> )} @@ -1881,7 +1847,11 @@ function _Chat() { ))} )} -
+
)} + {message.audio_url && ( + + )}
diff --git a/app/styles/globals.scss b/app/styles/globals.scss index e8c10de3f..fa7c0f2f2 100644 --- a/app/styles/globals.scss +++ b/app/styles/globals.scss @@ -399,3 +399,12 @@ pre { .copyable { user-select: text; } + +audio { + height: 35px; +} + +audio::-webkit-media-controls-play-button, + audio::-webkit-media-controls-panel { + background-color: none; + } diff --git a/package.json b/package.json index 8696f83b5..4c468e2ff 100644 --- a/package.json +++ b/package.json @@ -31,8 +31,8 @@ "html-to-image": "^1.11.11", "idb-keyval": "^6.2.1", "lodash-es": "^4.17.21", - "mermaid": "^10.6.1", "markdown-to-txt": "^2.0.1", + "mermaid": "^10.6.1", "nanoid": "^5.0.3", "next": "^14.1.1", "node-fetch": "^3.3.1", @@ -80,4 +80,4 @@ "lint-staged/yaml": "^2.2.2" }, "packageManager": "yarn@1.22.19" -} \ No newline at end of file +} From 39f5d263f9accef4f131f8d0d2e4f62e2f76067c Mon Sep 17 00:00:00 2001 From: dakai Date: Mon, 14 Oct 2024 02:33:28 +0800 Subject: [PATCH 4/6] follow the rabbit to fix potential issues --- app/components/chat.tsx | 12 +++++------- app/styles/globals.scss | 9 +++++---- app/utils.ts | 13 ------------- app/utils/audio.ts | 3 +++ 4 files changed, 13 insertions(+), 24 deletions(-) diff --git a/app/components/chat.tsx b/app/components/chat.tsx index 855b6bf3f..d3f568257 100644 --- a/app/components/chat.tsx +++ b/app/components/chat.tsx @@ -1122,12 +1122,11 @@ function _Chat() { }; const updateMessageAudio = (msgId?: string, audio_url?: string) => { - chatStore.updateCurrentSession( - (session) => - (session.messages = session.messages.map((m) => - m.id === msgId ? { ...m, audio_url } : m, - )), - ); + chatStore.updateCurrentSession((session) => { + session.messages = session.messages.map((m) => + m.id === msgId ? { ...m, audio_url } : m, + ); + }); }; const onDelete = (msgId: string) => { @@ -1903,7 +1902,6 @@ function _Chat() { )} {message.audio_url && (