This commit is contained in:
Dakai 2024-11-07 20:58:33 +08:00 committed by GitHub
commit 8ff7dbe59d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 123 additions and 17 deletions

View File

@ -41,6 +41,7 @@ export interface MultimodalContent {
export interface RequestMessage {
role: MessageRole;
content: string | MultimodalContent[];
audio_url?: string;
}
export interface LLMConfig {

View File

@ -443,6 +443,10 @@
transition: all ease 0.3s;
}
.audio-message {
min-width: 350px;
}
.chat-message-item-image {
width: 100%;
margin-top: 10px;
@ -471,6 +475,10 @@
border: rgba($color: #888, $alpha: 0.2) 1px solid;
}
.chat-message-item-audio {
margin-top: 10px;
width: 100%;
}
@media only screen and (max-width: 600px) {
$calc-image-width: calc(100vw/3*2/var(--image-count));
@ -519,7 +527,7 @@
background-color: var(--second);
&:hover {
min-width: 0;
//min-width: 350px;
}
}
@ -693,4 +701,4 @@
.shortcut-key span {
font-size: 12px;
color: var(--black);
}
}

View File

@ -116,7 +116,7 @@ import { useAllModels } from "../utils/hooks";
import { MultimodalContent } from "../client/api";
import { ClientApi } from "../client/api";
import { createTTSPlayer } from "../utils/audio";
import { createTTSPlayer, arrayBufferToWav } from "../utils/audio";
import { MsEdgeTTS, OUTPUT_FORMAT } from "../utils/ms_edge_tts";
import { isEmpty } from "lodash-es";
@ -1132,6 +1132,14 @@ function _Chat() {
);
};
const updateMessageAudio = (msgId?: string, audio_url?: string) => {
chatStore.updateCurrentSession((session) => {
session.messages = session.messages.map((m) =>
m.id === msgId ? { ...m, audio_url } : m,
);
});
};
const onDelete = (msgId: string) => {
deleteMessage(msgId);
};
@ -1208,7 +1216,7 @@ function _Chat() {
const accessStore = useAccessStore();
const [speechStatus, setSpeechStatus] = useState(false);
const [speechLoading, setSpeechLoading] = useState(false);
async function openaiSpeech(text: string) {
async function openaiSpeech(text: string): Promise<string | undefined> {
if (speechStatus) {
ttsPlayer.stop();
setSpeechStatus(false);
@ -1238,16 +1246,22 @@ function _Chat() {
});
}
setSpeechStatus(true);
ttsPlayer
.play(audioBuffer, () => {
try {
const waveFile = arrayBufferToWav(audioBuffer);
const audioFile = new Blob([waveFile], { type: "audio/wav" });
const audioUrl: string = await uploadImageRemote(audioFile);
await ttsPlayer.play(audioBuffer, () => {
setSpeechStatus(false);
})
.catch((e) => {
console.error("[OpenAI Speech]", e);
showToast(prettyObject(e));
setSpeechStatus(false);
})
.finally(() => setSpeechLoading(false));
});
return audioUrl;
} catch (e) {
console.error("[Speech Error]", e);
showToast(prettyObject(e));
setSpeechStatus(false);
} finally {
setSpeechLoading(false);
}
}
}
@ -1810,9 +1824,12 @@ function _Chat() {
<SpeakIcon />
)
}
onClick={() =>
openaiSpeech(getMessageTextContent(message))
}
onClick={async () => {
const url = await openaiSpeech(
getMessageTextContent(message),
);
updateMessageAudio(message.id, url);
}}
/>
)}
</>
@ -1847,7 +1864,11 @@ function _Chat() {
))}
</div>
)}
<div className={styles["chat-message-item"]}>
<div
className={`${styles["chat-message-item"]} ${
message.audio_url ? styles["audio-message"] : ""
}`}
>
<Markdown
key={message.streaming ? "loading" : "done"}
content={getMessageTextContent(message)}
@ -1896,6 +1917,16 @@ function _Chat() {
})}
</div>
)}
{message.audio_url && (
<audio
preload="auto"
controls
className={styles["chat-message-item-audio"]}
>
<source type="audio/mp3" src={message.audio_url} />
Sorry, your browser does not support HTML5 audio.
</audio>
)}
</div>
<div className={styles["chat-message-action-date"]}>

1
app/icons/play.svg Normal file
View File

@ -0,0 +1 @@
<svg class="svg-icon" style="width: 1em; height: 1em;vertical-align: middle;fill: currentColor;overflow: hidden;" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg"><path d="M384 810.666667c-8.533333 0-12.8 0-21.333333-4.266667C349.866667 797.866667 341.333333 785.066667 341.333333 768L341.333333 256c0-17.066667 8.533333-29.866667 21.333333-38.4 12.8-8.533333 29.866667-8.533333 42.666667 0l384 256c12.8 8.533333 17.066667 21.333333 17.066667 34.133333s-8.533333 25.6-17.066667 34.133333l-384 256C401.066667 806.4 392.533333 810.666667 384 810.666667zM426.666667 337.066667l0 354.133333 264.533333-174.933333L426.666667 337.066667z" /></svg>

After

Width:  |  Height:  |  Size: 666 B

1
app/icons/stop.svg Normal file
View File

@ -0,0 +1 @@
<svg class="svg-icon" style="width: 1em; height: 1em;vertical-align: middle;fill: currentColor;overflow: hidden;" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg"><path d="M256 256l512 0 0 512-512 0 0-512Z" /></svg>

After

Width:  |  Height:  |  Size: 240 B

View File

@ -399,3 +399,13 @@ pre {
.copyable {
user-select: text;
}
audio {
height: 35px;
}
audio::-webkit-media-controls-play-button,
audio::-webkit-media-controls-panel,
audio::-moz-media-controls-play-button,
audio::-moz-media-controls-panel {
background: none;
}

View File

@ -43,3 +43,57 @@ export function createTTSPlayer(): TTSPlayer {
return { init, play, stop };
}
export function arrayBufferToWav(buffer: ArrayBuffer): ArrayBuffer {
const numOfChannels = 1; // Mono
const sampleRate = 24000; // 24kHz
const bitsPerSample = 16;
const bytesPerSample = bitsPerSample / 8;
const blockAlign = numOfChannels * bytesPerSample;
const byteRate = sampleRate * blockAlign;
// WAV header size is 44 bytes
const wavHeaderSize = 44;
const dataSize = buffer.byteLength;
const totalSize = wavHeaderSize + dataSize;
const wavBuffer = new ArrayBuffer(totalSize);
const view = new DataView(wavBuffer);
// RIFF chunk descriptor
writeString(view, 0, "RIFF");
view.setUint32(4, totalSize - 8, true); // File size minus RIFF header
writeString(view, 8, "WAVE");
// FMT sub-chunk
writeString(view, 12, "fmt ");
view.setUint32(16, 16, true); // Sub-chunk size (16 for PCM)
view.setUint16(20, 1, true); // Audio format (1 for PCM)
view.setUint16(22, numOfChannels, true); // Number of channels
view.setUint32(24, sampleRate, true); // Sample rate
view.setUint32(28, byteRate, true); // Byte rate
view.setUint16(32, blockAlign, true); // Block align
view.setUint16(34, bitsPerSample, true); // Bits per sample
// Data sub-chunk
writeString(view, 36, "data");
view.setUint32(40, dataSize, true); // Data size
// Write the PCM samples
const audioData = new Uint8Array(buffer);
const wavData = new Uint8Array(wavBuffer);
wavData.set(audioData, wavHeaderSize);
return wavBuffer;
}
// Helper function to write a string to the DataView
function writeString(view: DataView, offset: number, string: string) {
if (offset + string.length > view.byteLength) {
throw new Error("String is too long for the available space in DataView");
}
for (let i = 0; i < string.length; i++) {
view.setUint8(offset + i, string.charCodeAt(i));
}
}