Compare commits

...

10 Commits

Author SHA1 Message Date
Dogtiti
48a1e8a584 chore: i18n 2024-11-07 21:32:47 +08:00
Dogtiti
e44ebe3f0e feat: realtime config 2024-11-07 21:28:23 +08:00
lloydzhou
283caba8ce stop streaming play after get input audio. 2024-11-07 18:57:57 +08:00
lloydzhou
b78e5db817 add temperature config 2024-11-07 17:55:51 +08:00
lloydzhou
46c469b2d7 add voice config 2024-11-07 17:47:55 +08:00
lloydzhou
c00ebbea4f update 2024-11-07 17:40:03 +08:00
lloydzhou
c526ff80b5 update 2024-11-07 17:23:20 +08:00
lloydzhou
0037b0c944 ts error 2024-11-07 17:03:04 +08:00
lloydzhou
6f81bb3b8a add context after connected 2024-11-07 16:56:15 +08:00
lloydzhou
7bdc45ed3e connect realtime model when open panel 2024-11-07 16:41:24 +08:00
7 changed files with 360 additions and 255 deletions

View File

@@ -793,11 +793,13 @@ export function ChatActions(props: {
)}
</>
<div className={styles["chat-input-actions-end"]}>
<ChatAction
onClick={() => props.setShowChatSidePanel(true)}
text={"Realtime Chat"}
icon={<HeadphoneIcon />}
/>
{config.realtimeConfig.enable && (
<ChatAction
onClick={() => props.setShowChatSidePanel(true)}
text={"Realtime Chat"}
icon={<HeadphoneIcon />}
/>
)}
</div>
</div>
);
@@ -2035,14 +2037,16 @@ function _Chat() {
[styles["chat-side-panel-show"]]: showChatSidePanel,
})}
>
<RealtimeChat
onClose={() => {
setShowChatSidePanel(false);
}}
onStartVoice={async () => {
console.log("start voice");
}}
/>
{showChatSidePanel && (
<RealtimeChat
onClose={() => {
setShowChatSidePanel(false);
}}
onStartVoice={async () => {
console.log("start voice");
}}
/>
)}
</div>
</div>
</div>

View File

@@ -1,6 +1,5 @@
import VoiceIcon from "@/app/icons/voice.svg";
import VoiceOffIcon from "@/app/icons/voice-off.svg";
import Close24Icon from "@/app/icons/close-24.svg";
import PowerIcon from "@/app/icons/power.svg";
import styles from "./realtime-chat.module.scss";
@@ -8,12 +7,7 @@ import clsx from "clsx";
import { useState, useRef, useEffect } from "react";
import {
useAccessStore,
useChatStore,
ChatMessage,
createMessage,
} from "@/app/store";
import { useChatStore, createMessage, useAppConfig } from "@/app/store";
import { IconButton } from "@/app/components/button";
@@ -38,55 +32,80 @@ export function RealtimeChat({
onStartVoice,
onPausedVoice,
}: RealtimeChatProps) {
const currentItemId = useRef<string>("");
const currentBotMessage = useRef<ChatMessage | null>();
const currentUserMessage = useRef<ChatMessage | null>();
const accessStore = useAccessStore.getState();
const chatStore = useChatStore();
const session = chatStore.currentSession();
const config = useAppConfig();
const [status, setStatus] = useState("");
const [isRecording, setIsRecording] = useState(false);
const [isConnected, setIsConnected] = useState(false);
const [isConnecting, setIsConnecting] = useState(false);
const [modality, setModality] = useState("audio");
const [isAzure, setIsAzure] = useState(false);
const [endpoint, setEndpoint] = useState("");
const [deployment, setDeployment] = useState("");
const [useVAD, setUseVAD] = useState(true);
const clientRef = useRef<RTClient | null>(null);
const audioHandlerRef = useRef<AudioHandler | null>(null);
const initRef = useRef(false);
const apiKey = accessStore.openaiApiKey;
const temperature = config.realtimeConfig.temperature;
const apiKey = config.realtimeConfig.apiKey;
const model = config.realtimeConfig.model;
const azure = config.realtimeConfig.provider === "Azure";
const azureEndpoint = config.realtimeConfig.azure.endpoint;
const azureDeployment = config.realtimeConfig.azure.deployment;
const voice = config.realtimeConfig.voice;
const handleConnect = async () => {
if (isConnecting) return;
if (!isConnected) {
try {
setIsConnecting(true);
clientRef.current = isAzure
? new RTClient(new URL(endpoint), { key: apiKey }, { deployment })
: new RTClient(
clientRef.current = azure
? new RTClient(
new URL(azureEndpoint),
{ key: apiKey },
{ model: "gpt-4o-realtime-preview-2024-10-01" },
);
{ deployment: azureDeployment },
)
: new RTClient({ key: apiKey }, { model });
const modalities: Modality[] =
modality === "audio" ? ["text", "audio"] : ["text"];
const turnDetection: TurnDetection = useVAD
? { type: "server_vad" }
: null;
clientRef.current.configure({
instructions: "Hi",
instructions: "",
voice,
input_audio_transcription: { model: "whisper-1" },
turn_detection: turnDetection,
tools: [],
temperature: 0.9,
temperature,
modalities,
});
startResponseListener();
setIsConnected(true);
try {
const recentMessages = chatStore.getMessagesWithMemory();
for (const message of recentMessages) {
const { role, content } = message;
if (typeof content === "string") {
await clientRef.current.sendItem({
type: "message",
role: role as any,
content: [
{
type: (role === "assistant" ? "text" : "input_text") as any,
text: content as string,
},
],
});
}
}
} catch (error) {
console.error("Set message failed:", error);
}
} catch (error) {
console.error("Connection failed:", error);
setStatus("Connection failed");
} finally {
setIsConnecting(false);
}
@@ -164,7 +183,6 @@ export function RealtimeChat({
const blob = audioHandlerRef.current?.savePlayFile();
uploadImage(blob!).then((audio_url) => {
botMessage.audio_url = audio_url;
// botMessage.date = new Date().toLocaleString();
// update text and audio_url
chatStore.updateTargetSession(session, (session) => {
session.messages = session.messages.concat();
@@ -175,7 +193,6 @@ export function RealtimeChat({
};
const handleInputAudio = async (item: RTInputAudioItem) => {
audioHandlerRef.current?.stopStreamingPlayback();
await item.waitForCompletion();
if (item.transcription) {
const userMessage = createMessage({
@@ -199,6 +216,8 @@ export function RealtimeChat({
});
});
}
// stop streaming play after get input audio.
audioHandlerRef.current?.stopStreamingPlayback();
};
const toggleRecording = async () => {
@@ -231,222 +250,53 @@ export function RealtimeChat({
};
useEffect(() => {
// 防止重复初始化
if (initRef.current) return;
initRef.current = true;
const initAudioHandler = async () => {
const handler = new AudioHandler();
await handler.initialize();
audioHandlerRef.current = handler;
await handleConnect();
await toggleRecording();
};
initAudioHandler().catch(console.error);
initAudioHandler().catch((error) => {
setStatus(error);
console.error(error);
});
return () => {
disconnect();
if (isRecording) {
toggleRecording();
}
audioHandlerRef.current?.close().catch(console.error);
disconnect();
};
}, []);
// useEffect(() => {
// if (
// clientRef.current?.getTurnDetectionType() === "server_vad" &&
// audioData
// ) {
// // console.log("appendInputAudio", audioData);
// // 将录制的16PCM音频发送给openai
// clientRef.current?.appendInputAudio(audioData);
// }
// }, [audioData]);
// update session params
useEffect(() => {
clientRef.current?.configure({ voice });
}, [voice]);
useEffect(() => {
clientRef.current?.configure({ temperature });
}, [temperature]);
// useEffect(() => {
// console.log("isRecording", isRecording);
// if (!isRecording.current) return;
// if (!clientRef.current) {
// const apiKey = accessStore.openaiApiKey;
// const client = (clientRef.current = new RealtimeClient({
// url: "wss://api.openai.com/v1/realtime",
// apiKey,
// dangerouslyAllowAPIKeyInBrowser: true,
// debug: true,
// }));
// client
// .connect()
// .then(() => {
// // TODO 设置真实的上下文
// client.sendUserMessageContent([
// {
// type: `input_text`,
// text: `Hi`,
// // text: `For testing purposes, I want you to list ten car brands. Number each item, e.g. "one (or whatever number you are one): the item name".`
// },
// ]);
// // 配置服务端判断说话人开启还是结束
// client.updateSession({
// turn_detection: { type: "server_vad" },
// });
// client.on("realtime.event", (realtimeEvent) => {
// // 调试
// console.log("realtime.event", realtimeEvent);
// });
// client.on("conversation.interrupted", async () => {
// if (currentBotMessage.current) {
// stopPlaying();
// try {
// client.cancelResponse(
// currentBotMessage.current?.id,
// currentTime(),
// );
// } catch (e) {
// console.error(e);
// }
// }
// });
// client.on("conversation.updated", async (event: any) => {
// // console.log("currentSession", chatStore.currentSession());
// // const items = client.conversation.getItems();
// const content = event?.item?.content?.[0]?.transcript || "";
// const text = event?.item?.content?.[0]?.text || "";
// // console.log(
// // "conversation.updated",
// // event,
// // "content[0]",
// // event?.item?.content?.[0]?.transcript,
// // "formatted",
// // event?.item?.formatted?.transcript,
// // "content",
// // content,
// // "text",
// // text,
// // event?.item?.status,
// // event?.item?.role,
// // items.length,
// // items,
// // );
// const { item, delta } = event;
// const { role, id, status, formatted } = item || {};
// if (id && role == "assistant") {
// if (
// !currentBotMessage.current ||
// currentBotMessage.current?.id != id
// ) {
// // create assistant message and save to session
// currentBotMessage.current = createMessage({ id, role });
// chatStore.updateCurrentSession((session) => {
// session.messages = session.messages.concat([
// currentBotMessage.current!,
// ]);
// });
// }
// if (currentBotMessage.current?.id != id) {
// stopPlaying();
// }
// if (content) {
// currentBotMessage.current.content = content;
// chatStore.updateCurrentSession((session) => {
// session.messages = session.messages.concat();
// });
// }
// if (delta?.audio) {
// // typeof delta.audio is Int16Array
// // 直接播放
// addInt16PCM(delta.audio);
// }
// // console.log(
// // "updated try save wavFile",
// // status,
// // currentBotMessage.current?.audio_url,
// // formatted?.audio,
// // );
// if (
// status == "completed" &&
// !currentBotMessage.current?.audio_url &&
// formatted?.audio?.length
// ) {
// // 转换为wav文件保存 TODO 使用mp3格式会更节省空间
// const botMessage = currentBotMessage.current;
// const wavFile = new WavPacker().pack(sampleRate, {
// bitsPerSample: 16,
// channelCount: 1,
// data: formatted?.audio,
// });
// // 这里将音频文件放到对象里面wavFile.url可以使用<audio>标签播放
// item.formatted.file = wavFile;
// uploadImageRemote(wavFile.blob).then((audio_url) => {
// botMessage.audio_url = audio_url;
// chatStore.updateCurrentSession((session) => {
// session.messages = session.messages.concat();
// });
// });
// }
// if (
// status == "completed" &&
// !currentBotMessage.current?.content
// ) {
// chatStore.updateCurrentSession((session) => {
// session.messages = session.messages.filter(
// (m) => m.id !== currentBotMessage.current?.id,
// );
// });
// }
// }
// if (id && role == "user" && !text) {
// if (
// !currentUserMessage.current ||
// currentUserMessage.current?.id != id
// ) {
// // create assistant message and save to session
// currentUserMessage.current = createMessage({ id, role });
// chatStore.updateCurrentSession((session) => {
// session.messages = session.messages.concat([
// currentUserMessage.current!,
// ]);
// });
// }
// if (content) {
// // 转换为wav文件保存 TODO 使用mp3格式会更节省空间
// const userMessage = currentUserMessage.current;
// const wavFile = new WavPacker().pack(sampleRate, {
// bitsPerSample: 16,
// channelCount: 1,
// data: formatted?.audio,
// });
// // 这里将音频文件放到对象里面wavFile.url可以使用<audio>标签播放
// item.formatted.file = wavFile;
// uploadImageRemote(wavFile.blob).then((audio_url) => {
// // update message content
// userMessage.content = content;
// // update message audio_url
// userMessage.audio_url = audio_url;
// chatStore.updateCurrentSession((session) => {
// session.messages = session.messages.concat();
// });
// });
// }
// }
// });
// })
// .catch((e) => {
// console.error("Error", e);
// });
// }
// return () => {
// stop();
// // TODO close client
// clientRef.current?.disconnect();
// };
// }, [isRecording.current]);
const handleClose = () => {
const handleClose = async () => {
onClose?.();
disconnect();
if (isRecording) {
await toggleRecording();
}
disconnect().catch(console.error);
};
return (
<div className={styles["realtime-chat"]}>
<div
className={clsx(styles["circle-mic"], {
[styles["pulse"]]: true,
[styles["pulse"]]: isRecording,
})}
>
<div className={styles["icon-center"]}></div>
@@ -454,35 +304,20 @@ export function RealtimeChat({
<div className={styles["bottom-icons"]}>
<div>
<IconButton
icon={isRecording ? <VoiceOffIcon /> : <VoiceIcon />}
icon={isRecording ? <VoiceIcon /> : <VoiceOffIcon />}
onClick={toggleRecording}
disabled={!isConnected}
bordered
shadow
bordered
/>
</div>
<div className={styles["icon-center"]}>
<div className={styles["icon-center"]}>{status}</div>
<div>
<IconButton
icon={<PowerIcon />}
text={
isConnecting
? "Connecting..."
: isConnected
? "Disconnect"
: "Connect"
}
onClick={handleConnect}
disabled={isConnecting}
bordered
shadow
/>
</div>
<div onClick={handleClose}>
<IconButton
icon={<Close24Icon />}
onClick={handleClose}
bordered
shadow
bordered
/>
</div>
</div>

View File

@@ -0,0 +1,173 @@
import { RealtimeConfig } from "@/app/store";
import Locale from "@/app/locales";
import { ListItem, Select, PasswordInput } from "@/app/components/ui-lib";
import { InputRange } from "@/app/components/input-range";
import { Voice } from "rt-client";
import { ServiceProvider } from "@/app/constant";
const providers = [ServiceProvider.OpenAI, ServiceProvider.Azure];
const models = ["gpt-4o-realtime-preview-2024-10-01"];
const voice = ["alloy", "shimmer", "echo"];
export function RealtimeConfigList(props: {
realtimeConfig: RealtimeConfig;
updateConfig: (updater: (config: RealtimeConfig) => void) => void;
}) {
const azureConfigComponent = props.realtimeConfig.provider ===
ServiceProvider.Azure && (
<>
<ListItem
title={Locale.Settings.Realtime.Azure.Endpoint.Title}
subTitle={Locale.Settings.Realtime.Azure.Endpoint.SubTitle}
>
<input
value={props.realtimeConfig?.azure?.endpoint}
type="text"
placeholder={Locale.Settings.Realtime.Azure.Endpoint.Title}
onChange={(e) => {
props.updateConfig(
(config) => (config.azure.endpoint = e.currentTarget.value),
);
}}
/>
</ListItem>
<ListItem
title={Locale.Settings.Realtime.Azure.Deployment.Title}
subTitle={Locale.Settings.Realtime.Azure.Deployment.SubTitle}
>
<input
value={props.realtimeConfig?.azure?.deployment}
type="text"
placeholder={Locale.Settings.Realtime.Azure.Deployment.Title}
onChange={(e) => {
props.updateConfig(
(config) => (config.azure.deployment = e.currentTarget.value),
);
}}
/>
</ListItem>
</>
);
return (
<>
<ListItem
title={Locale.Settings.Realtime.Enable.Title}
subTitle={Locale.Settings.Realtime.Enable.SubTitle}
>
<input
type="checkbox"
checked={props.realtimeConfig.enable}
onChange={(e) =>
props.updateConfig(
(config) => (config.enable = e.currentTarget.checked),
)
}
></input>
</ListItem>
{props.realtimeConfig.enable && (
<>
<ListItem
title={Locale.Settings.Realtime.Provider.Title}
subTitle={Locale.Settings.Realtime.Provider.SubTitle}
>
<Select
aria-label={Locale.Settings.Realtime.Provider.Title}
value={props.realtimeConfig.provider}
onChange={(e) => {
props.updateConfig(
(config) =>
(config.provider = e.target.value as ServiceProvider),
);
}}
>
{providers.map((v, i) => (
<option value={v} key={i}>
{v}
</option>
))}
</Select>
</ListItem>
<ListItem
title={Locale.Settings.Realtime.Model.Title}
subTitle={Locale.Settings.Realtime.Model.SubTitle}
>
<Select
aria-label={Locale.Settings.Realtime.Model.Title}
value={props.realtimeConfig.model}
onChange={(e) => {
props.updateConfig((config) => (config.model = e.target.value));
}}
>
{models.map((v, i) => (
<option value={v} key={i}>
{v}
</option>
))}
</Select>
</ListItem>
<ListItem
title={Locale.Settings.Realtime.ApiKey.Title}
subTitle={Locale.Settings.Realtime.ApiKey.SubTitle}
>
<PasswordInput
aria={Locale.Settings.ShowPassword}
aria-label={Locale.Settings.Realtime.ApiKey.Title}
value={props.realtimeConfig.apiKey}
type="text"
placeholder={Locale.Settings.Realtime.ApiKey.Placeholder}
onChange={(e) => {
props.updateConfig(
(config) => (config.apiKey = e.currentTarget.value),
);
}}
/>
</ListItem>
{azureConfigComponent}
<ListItem
title={Locale.Settings.TTS.Voice.Title}
subTitle={Locale.Settings.TTS.Voice.SubTitle}
>
<Select
value={props.realtimeConfig.voice}
onChange={(e) => {
props.updateConfig(
(config) => (config.voice = e.currentTarget.value as Voice),
);
}}
>
{voice.map((v, i) => (
<option value={v} key={i}>
{v}
</option>
))}
</Select>
</ListItem>
<ListItem
title={Locale.Settings.Realtime.Temperature.Title}
subTitle={Locale.Settings.Realtime.Temperature.SubTitle}
>
<InputRange
aria={Locale.Settings.Temperature.Title}
value={props.realtimeConfig?.temperature?.toFixed(1)}
min="0.6"
max="1"
step="0.1"
onChange={(e) => {
props.updateConfig(
(config) =>
(config.temperature = e.currentTarget.valueAsNumber),
);
}}
></InputRange>
</ListItem>
</>
)}
</>
);
}

View File

@@ -85,6 +85,7 @@ import { nanoid } from "nanoid";
import { useMaskStore } from "../store/mask";
import { ProviderType } from "../utils/cloud";
import { TTSConfigList } from "./tts-config";
import { RealtimeConfigList } from "./realtime-chat/realtime-config";
function EditPromptModal(props: { id: string; onClose: () => void }) {
const promptStore = usePromptStore();
@@ -1799,7 +1800,18 @@ export function Settings() {
{shouldShowPromptModal && (
<UserPromptModal onClose={() => setShowPromptModal(false)} />
)}
<List>
<RealtimeConfigList
realtimeConfig={config.realtimeConfig}
updateConfig={(updater) => {
const realtimeConfig = { ...config.realtimeConfig };
updater(realtimeConfig);
config.update(
(config) => (config.realtimeConfig = realtimeConfig),
);
}}
/>
</List>
<List>
<TTSConfigList
ttsConfig={config.ttsConfig}

View File

@@ -562,6 +562,39 @@ const cn = {
SubTitle: "生成语音的速度",
},
},
Realtime: {
Enable: {
Title: "实时聊天",
SubTitle: "开启实时聊天功能",
},
Provider: {
Title: "模型服务商",
SubTitle: "切换不同的服务商",
},
Model: {
Title: "模型",
SubTitle: "选择一个模型",
},
ApiKey: {
Title: "API Key",
SubTitle: "API Key",
Placeholder: "API Key",
},
Azure: {
Endpoint: {
Title: "接口地址",
SubTitle: "接口地址",
},
Deployment: {
Title: "部署名称",
SubTitle: "部署名称",
},
},
Temperature: {
Title: "随机性 (temperature)",
SubTitle: "值越大,回复越随机",
},
},
},
Store: {
DefaultTopic: "新的聊天",

View File

@@ -570,6 +570,39 @@ const en: LocaleType = {
},
Engine: "TTS Engine",
},
Realtime: {
Enable: {
Title: "Realtime Chat",
SubTitle: "Enable realtime chat feature",
},
Provider: {
Title: "Model Provider",
SubTitle: "Switch between different providers",
},
Model: {
Title: "Model",
SubTitle: "Select a model",
},
ApiKey: {
Title: "API Key",
SubTitle: "API Key",
Placeholder: "API Key",
},
Azure: {
Endpoint: {
Title: "Endpoint",
SubTitle: "Endpoint",
},
Deployment: {
Title: "Deployment Name",
SubTitle: "Deployment Name",
},
},
Temperature: {
Title: "Randomness (temperature)",
SubTitle: "Higher values result in more random responses",
},
},
},
Store: {
DefaultTopic: "New Conversation",

View File

@@ -15,6 +15,7 @@ import {
ServiceProvider,
} from "../constant";
import { createPersistStore } from "../utils/store";
import type { Voice } from "rt-client";
export type ModelType = (typeof DEFAULT_MODELS)[number]["name"];
export type TTSModelType = (typeof DEFAULT_TTS_MODELS)[number];
@@ -90,12 +91,26 @@ export const DEFAULT_CONFIG = {
voice: DEFAULT_TTS_VOICE,
speed: 1.0,
},
realtimeConfig: {
enable: false,
provider: "OpenAI" as ServiceProvider,
model: "gpt-4o-realtime-preview-2024-10-01",
apiKey: "",
azure: {
endpoint: "",
deployment: "",
},
temperature: 0.9,
voice: "alloy" as Voice,
},
};
export type ChatConfig = typeof DEFAULT_CONFIG;
export type ModelConfig = ChatConfig["modelConfig"];
export type TTSConfig = ChatConfig["ttsConfig"];
export type RealtimeConfig = ChatConfig["realtimeConfig"];
export function limitNumber(
x: number,