This commit is contained in:
Dakai 2024-11-07 20:58:33 +08:00 committed by GitHub
commit 8ff7dbe59d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 123 additions and 17 deletions

View File

@ -41,6 +41,7 @@ export interface MultimodalContent {
export interface RequestMessage { export interface RequestMessage {
role: MessageRole; role: MessageRole;
content: string | MultimodalContent[]; content: string | MultimodalContent[];
audio_url?: string;
} }
export interface LLMConfig { export interface LLMConfig {

View File

@ -443,6 +443,10 @@
transition: all ease 0.3s; transition: all ease 0.3s;
} }
.audio-message {
min-width: 350px;
}
.chat-message-item-image { .chat-message-item-image {
width: 100%; width: 100%;
margin-top: 10px; margin-top: 10px;
@ -471,6 +475,10 @@
border: rgba($color: #888, $alpha: 0.2) 1px solid; border: rgba($color: #888, $alpha: 0.2) 1px solid;
} }
.chat-message-item-audio {
margin-top: 10px;
width: 100%;
}
@media only screen and (max-width: 600px) { @media only screen and (max-width: 600px) {
$calc-image-width: calc(100vw/3*2/var(--image-count)); $calc-image-width: calc(100vw/3*2/var(--image-count));
@ -519,7 +527,7 @@
background-color: var(--second); background-color: var(--second);
&:hover { &:hover {
min-width: 0; //min-width: 350px;
} }
} }

View File

@ -116,7 +116,7 @@ import { useAllModels } from "../utils/hooks";
import { MultimodalContent } from "../client/api"; import { MultimodalContent } from "../client/api";
import { ClientApi } from "../client/api"; import { ClientApi } from "../client/api";
import { createTTSPlayer } from "../utils/audio"; import { createTTSPlayer, arrayBufferToWav } from "../utils/audio";
import { MsEdgeTTS, OUTPUT_FORMAT } from "../utils/ms_edge_tts"; import { MsEdgeTTS, OUTPUT_FORMAT } from "../utils/ms_edge_tts";
import { isEmpty } from "lodash-es"; import { isEmpty } from "lodash-es";
@ -1132,6 +1132,14 @@ function _Chat() {
); );
}; };
const updateMessageAudio = (msgId?: string, audio_url?: string) => {
chatStore.updateCurrentSession((session) => {
session.messages = session.messages.map((m) =>
m.id === msgId ? { ...m, audio_url } : m,
);
});
};
const onDelete = (msgId: string) => { const onDelete = (msgId: string) => {
deleteMessage(msgId); deleteMessage(msgId);
}; };
@ -1208,7 +1216,7 @@ function _Chat() {
const accessStore = useAccessStore(); const accessStore = useAccessStore();
const [speechStatus, setSpeechStatus] = useState(false); const [speechStatus, setSpeechStatus] = useState(false);
const [speechLoading, setSpeechLoading] = useState(false); const [speechLoading, setSpeechLoading] = useState(false);
async function openaiSpeech(text: string) { async function openaiSpeech(text: string): Promise<string | undefined> {
if (speechStatus) { if (speechStatus) {
ttsPlayer.stop(); ttsPlayer.stop();
setSpeechStatus(false); setSpeechStatus(false);
@ -1238,16 +1246,22 @@ function _Chat() {
}); });
} }
setSpeechStatus(true); setSpeechStatus(true);
ttsPlayer try {
.play(audioBuffer, () => { const waveFile = arrayBufferToWav(audioBuffer);
const audioFile = new Blob([waveFile], { type: "audio/wav" });
const audioUrl: string = await uploadImageRemote(audioFile);
await ttsPlayer.play(audioBuffer, () => {
setSpeechStatus(false); setSpeechStatus(false);
}) });
.catch((e) => { return audioUrl;
console.error("[OpenAI Speech]", e); } catch (e) {
console.error("[Speech Error]", e);
showToast(prettyObject(e)); showToast(prettyObject(e));
setSpeechStatus(false); setSpeechStatus(false);
}) } finally {
.finally(() => setSpeechLoading(false)); setSpeechLoading(false);
}
} }
} }
@ -1810,9 +1824,12 @@ function _Chat() {
<SpeakIcon /> <SpeakIcon />
) )
} }
onClick={() => onClick={async () => {
openaiSpeech(getMessageTextContent(message)) const url = await openaiSpeech(
} getMessageTextContent(message),
);
updateMessageAudio(message.id, url);
}}
/> />
)} )}
</> </>
@ -1847,7 +1864,11 @@ function _Chat() {
))} ))}
</div> </div>
)} )}
<div className={styles["chat-message-item"]}> <div
className={`${styles["chat-message-item"]} ${
message.audio_url ? styles["audio-message"] : ""
}`}
>
<Markdown <Markdown
key={message.streaming ? "loading" : "done"} key={message.streaming ? "loading" : "done"}
content={getMessageTextContent(message)} content={getMessageTextContent(message)}
@ -1896,6 +1917,16 @@ function _Chat() {
})} })}
</div> </div>
)} )}
{message.audio_url && (
<audio
preload="auto"
controls
className={styles["chat-message-item-audio"]}
>
<source type="audio/mp3" src={message.audio_url} />
Sorry, your browser does not support HTML5 audio.
</audio>
)}
</div> </div>
<div className={styles["chat-message-action-date"]}> <div className={styles["chat-message-action-date"]}>

1
app/icons/play.svg Normal file
View File

@ -0,0 +1 @@
<svg class="svg-icon" style="width: 1em; height: 1em;vertical-align: middle;fill: currentColor;overflow: hidden;" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg"><path d="M384 810.666667c-8.533333 0-12.8 0-21.333333-4.266667C349.866667 797.866667 341.333333 785.066667 341.333333 768L341.333333 256c0-17.066667 8.533333-29.866667 21.333333-38.4 12.8-8.533333 29.866667-8.533333 42.666667 0l384 256c12.8 8.533333 17.066667 21.333333 17.066667 34.133333s-8.533333 25.6-17.066667 34.133333l-384 256C401.066667 806.4 392.533333 810.666667 384 810.666667zM426.666667 337.066667l0 354.133333 264.533333-174.933333L426.666667 337.066667z" /></svg>

After

Width:  |  Height:  |  Size: 666 B

1
app/icons/stop.svg Normal file
View File

@ -0,0 +1 @@
<svg class="svg-icon" style="width: 1em; height: 1em;vertical-align: middle;fill: currentColor;overflow: hidden;" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg"><path d="M256 256l512 0 0 512-512 0 0-512Z" /></svg>

After

Width:  |  Height:  |  Size: 240 B

View File

@ -399,3 +399,13 @@ pre {
.copyable { .copyable {
user-select: text; user-select: text;
} }
audio {
height: 35px;
}
audio::-webkit-media-controls-play-button,
audio::-webkit-media-controls-panel,
audio::-moz-media-controls-play-button,
audio::-moz-media-controls-panel {
background: none;
}

View File

@ -43,3 +43,57 @@ export function createTTSPlayer(): TTSPlayer {
return { init, play, stop }; return { init, play, stop };
} }
export function arrayBufferToWav(buffer: ArrayBuffer): ArrayBuffer {
const numOfChannels = 1; // Mono
const sampleRate = 24000; // 24kHz
const bitsPerSample = 16;
const bytesPerSample = bitsPerSample / 8;
const blockAlign = numOfChannels * bytesPerSample;
const byteRate = sampleRate * blockAlign;
// WAV header size is 44 bytes
const wavHeaderSize = 44;
const dataSize = buffer.byteLength;
const totalSize = wavHeaderSize + dataSize;
const wavBuffer = new ArrayBuffer(totalSize);
const view = new DataView(wavBuffer);
// RIFF chunk descriptor
writeString(view, 0, "RIFF");
view.setUint32(4, totalSize - 8, true); // File size minus RIFF header
writeString(view, 8, "WAVE");
// FMT sub-chunk
writeString(view, 12, "fmt ");
view.setUint32(16, 16, true); // Sub-chunk size (16 for PCM)
view.setUint16(20, 1, true); // Audio format (1 for PCM)
view.setUint16(22, numOfChannels, true); // Number of channels
view.setUint32(24, sampleRate, true); // Sample rate
view.setUint32(28, byteRate, true); // Byte rate
view.setUint16(32, blockAlign, true); // Block align
view.setUint16(34, bitsPerSample, true); // Bits per sample
// Data sub-chunk
writeString(view, 36, "data");
view.setUint32(40, dataSize, true); // Data size
// Write the PCM samples
const audioData = new Uint8Array(buffer);
const wavData = new Uint8Array(wavBuffer);
wavData.set(audioData, wavHeaderSize);
return wavBuffer;
}
// Helper function to write a string to the DataView
function writeString(view: DataView, offset: number, string: string) {
if (offset + string.length > view.byteLength) {
throw new Error("String is too long for the available space in DataView");
}
for (let i = 0; i < string.length; i++) {
view.setUint8(offset + i, string.charCodeAt(i));
}
}