Sfoglia il codice sorgente

feat: 接入语音播报

王晓东 3 mesi fa
parent
commit
4837c4b5bb

+ 1 - 1
src/components/component-list/components/card-channels/index.tsx

@@ -63,7 +63,7 @@ export default ({
     return <View className="px-16"><View className="component-card-empty mb-16">
       <View className="component-card-content">
         <View className="component-card-empty-figure"></View>
-        <View className="component-card-empty-tips">点击配置图片/视频</View>
+        <View className="component-card-empty-tips">配置视频号图片/视频</View>
       </View>
     </View>
     </View>

+ 2 - 1
src/components/component-list/index.tsx

@@ -71,12 +71,13 @@ export default ({ components, editMode = false }: Props) => {
       return
     }
     // 视频号不显示复制框,直接跳转
-    if(c?.type === SocialMediaType.shiping.value && link){
+    if(c?.type === SocialMediaType.shiping.value){
       Taro.openChannelsUserProfile({
         finderUserName: link
       })
       return
     }
+    
     setCurrentMediaItem(mediaItem);
     setCurrentLink(link);
     setShow(true);

+ 33 - 34
src/pages/chat/components/input-bar/chatInput.ts

@@ -5,11 +5,14 @@ import { textChat } from "@/service/bot";
 import { useTextChat } from "@/store/textChat";
 import { TAgentDetail } from "@/types/agent";
 import { delay, getLoginId, isSuccess } from "@/utils";
-import { EAI_MODEL } from "@/consts/enum";
-import { useUnload } from "@tarojs/taro";
+import { 
+  useAudioPlayer
+ } from "@/utils/audio";
+
+import Taro, { useUnload } from "@tarojs/taro";
 import { EChatRole, EContentType, TRobotMessage } from "@/types/bot";
 
-import { usePostMessage } from './message'
+import { usePostMessage, saveRobotContentToServer } from './message'
 
 import { getRecommendPrompt } from "@/service/bot"
 
@@ -35,7 +38,13 @@ export const useChatInput = ({ agent, setShowWelcome, setDisabled, }: Props) =>
     questions,
   } = useTextChat();
   const { startTimedMessage, stopTimedMessage, saveMessageToServer } = usePostMessage(getCurrentRobotMessage);
-  
+
+  const {
+    setFistChunk,
+    pushBase64ToQuene,
+    playChunk,
+  } = useAudioPlayer()
+
   let myMsgUk = '';
   let mySessionId = '';
 
@@ -82,12 +91,13 @@ export const useChatInput = ({ agent, setShowWelcome, setDisabled, }: Props) =>
     if(!isSuccess(myMsgResponse.status)){
       return setDisabled?.(false);
     }
-
+    let isFirstChunk = true
+    
     // 发起文本聊天
     stopReceiveChunk = textChat({
       params: {
         agentId: agent.agentId,
-        isEnableOutputAudioStream: false,
+        isEnableOutputAudioStream: true,
         isEnableSearch: false,
         isEnableThinking: false,
         loginId,
@@ -123,9 +133,10 @@ export const useChatInput = ({ agent, setShowWelcome, setDisabled, }: Props) =>
           agentId: agent.agentId ?? '',
           sessionId,
         }, 5000);
+        isFirstChunk = true
       },
       onReceived: (m) => {
-        console.log("received:", m);
+        // console.log("received:", m);
         if (m.reasoningContent) {
           updateRobotReasoningMessage(
             currentRobotMsgUk,
@@ -134,6 +145,15 @@ export const useChatInput = ({ agent, setShowWelcome, setDisabled, }: Props) =>
           );
         } else {
           updateRobotMessage(m.content, m.body);
+          // pushBase64ToQuene(m.body.content.audio)
+          if(isFirstChunk){
+            isFirstChunk = false
+            setFistChunk(m.body.content.audio)
+          }else{
+            pushBase64ToQuene(m.body.content.audio)
+          }
+
+          playChunk();
         }
       },
       onFinished: async () => {
@@ -155,19 +175,9 @@ export const useChatInput = ({ agent, setShowWelcome, setDisabled, }: Props) =>
         // currentRobotMessage.content 保存的是当前完整的智能体回复信息文本
         const content = currentRobotMessage.content as string
         updateRobotMessage(content, currentRobotMessage?.body, 2, true)
-        await saveMessageToServer({
-          loginId,
-          messages: [{
-            saveStatus: 2,
-            content: currentRobotMessage?.body?.content || content,
-            contentType: currentRobotMessage?.body?.contentType ?? EContentType.TextPlain,
-            isStreaming: false,
-            role: currentRobotMessage.role,
-            msgUk: currentRobotMessage.msgUk,
-          }],
-          agentId: agent.agentId,
-          sessionId,
-        })
+        
+        saveRobotContentToServer(currentRobotMessage, loginId, agent.agentId, sessionId)
+        
         
         const response = await getRecommendPrompt({
           agentId: agent.agentId,
@@ -185,23 +195,12 @@ export const useChatInput = ({ agent, setShowWelcome, setDisabled, }: Props) =>
         // 为防止服务端没有终止消息,当接口请求结束时强制再保存一次消息体,以防定时保存的消息体漏掉最后一部分
         const currentRobotMessage = getCurrentRobotMessage();
         if(currentRobotMessage && agent.agentId){
-          const content = currentRobotMessage.content as string
-          await saveMessageToServer({
-            loginId,
-            messages: [{
-              saveStatus: 2,
-              content: currentRobotMessage?.body?.content || content ,
-              contentType: currentRobotMessage?.body?.contentType ?? EContentType.TextPlain,
-              isStreaming: false,
-              role: currentRobotMessage.role,
-              msgUk: currentRobotMessage.msgUk,
-            }],
-            agentId: agent.agentId,
-            sessionId,
-          })
+          saveRobotContentToServer(currentRobotMessage, loginId, agent.agentId, sessionId) 
         }
         
         setDisabled?.(false);
+        
+        isFirstChunk = true
       },
       onError: () => {
         setDisabled?.(false);

+ 40 - 4
src/pages/chat/components/input-bar/message.ts

@@ -17,6 +17,38 @@ export const saveMessageToServer = (data: TAppendMessages) => {
   return appendMessages(postData)
 }
 
+
+
+export const saveRobotContentToServer = async (currentRobotMessage: TRobotMessage, loginId:string, agentId:string, sessionId: string) => {
+  const currentContentType = currentRobotMessage?.body?.contentType
+  let content = ''
+  // audio chunk 不需要上报,在初始时已经上报
+  if(currentContentType === EContentType.AiseekAudioChunk && currentRobotMessage?.body?.content.sentenceBegin){
+    // 只保存文本至服务器
+    content = (currentRobotMessage?.content as string) ?? ''
+  }else if(currentContentType === EContentType.AiseekQA ){
+    // 也原样保存至服务器
+    content = (currentRobotMessage?.content as string) ?? ''
+  }else{
+    content = (currentRobotMessage?.content as string) ?? ''
+  }
+
+
+  await saveMessageToServer({
+    loginId,
+    messages: [{
+      saveStatus: 2,
+      content: content,
+      contentType: currentContentType ?? EContentType.TextPlain,
+      isStreaming: true,
+      role: currentRobotMessage.role,
+      msgUk: currentRobotMessage.msgUk,
+    }],
+    agentId: agentId,
+    sessionId,
+  })
+}
+
 // 定时上报智能体回复的消息体
 
 export const usePostMessage = (getCurrentRobotMessage:() => TRobotMessage | undefined)=> {
@@ -54,12 +86,16 @@ export const usePostMessage = (getCurrentRobotMessage:() => TRobotMessage | unde
         if(!msg){
           return 
         }
+        const message = {
+          ...msg,
+          isStreaming: msg.isStreaming ?? false,
+          contentType: msg.contentType ?? EContentType.TextPlain,
+          saveStatus: msg.saveStatus ?? 1
+        }
+
         await saveMessageToServer({
           ...data,
-          messages: [{
-            ...msg,
-            saveStatus: msg.saveStatus ?? 1
-          }]
+          messages: [message]
         });
         console.log('定时发送消息成功');
       } catch (error) {

+ 1 - 1
src/pages/contact/components/contact-card/index.tsx

@@ -58,7 +58,7 @@ const Index = ({data, deleteable, className, refresh, fromContact}: Props)=> {
         <View className={`${style.nameRow} truncate`}>
           <View className={`${style.nickName} truncate`}>{data.name}</View>
         </View>
-        <View className='flex flex-col w-full gap-8 truncate'>
+        <View className='flex flex-col w-full gap-2 truncate'>
           {renderEntNameAndPosition()}
           <View className={`flex-1 ${style.lastMsg} truncate`}>{data.lastChatMsg}</View>
         </View>

+ 3 - 3
src/service/bot.ts

@@ -101,11 +101,11 @@ export const textChat = ({
     onFinished();
   });
   const onChunkReceived = (chunk: any) => {
-    console.log('chunkReceived: ', chunk);
+    // console.log('chunkReceived: ', chunk);
     const uint8Array = new Uint8Array(chunk.data);
-    console.log('uint8Array: ', uint8Array);
+    // console.log('uint8Array: ', uint8Array);
     var string = new TextDecoder("utf-8").decode(uint8Array);
-    console.log(string);
+    // console.log(string);
     jsonParser.parseChunk(string, (m) => {
       console.log('parseChunk', m);
       onReceived(m);

+ 6 - 2
src/types/bot.ts

@@ -26,14 +26,18 @@ export type TContentType = EContentType[keyof EContentType];
 
 // contentType === "aiseek/qa" 消息体内容
 export type TMessageBodyContent = {
-  "answer": {
+  answer: {
       "payload": {
           "links": string[],
           "pics": string[]
       },
       "text": string
   },
-  "qaId": string
+  audio?: string // "FwXHApoAmf5y/Z/+f/6A+yr5FftA+yH43PWe8y7zXPKK8lT0j/"
+  format?: string //"pcm"
+  sampleRate?:  number// 16000
+  sentenceBegin?: string
+  qaId: string
 }
 export type TMessageBody = {
   content: TMessageBodyContent,

+ 60 - 91
src/utils/audio.ts

@@ -1,38 +1,73 @@
 import Taro from "@tarojs/taro";
-import { ECharacterAISTATUS } from '@/types/index'
-let audioCtx:Taro.WebAudioContext | AudioContext;
-if (process.env.TARO_ENV === 'h5') {
-  audioCtx = new AudioContext()
-}else {
-  audioCtx = Taro.createWebAudioContext()
+import { decode } from '@/utils'
+
+
+function combineArrayBuffers(arrays: ArrayBuffer[], totalLength: number): ArrayBuffer {
+  const result = new Uint8Array(totalLength);
+  let offset = 0;
+
+  for (let i = 0; i < arrays.length; i++) {
+    const array = new Uint8Array(arrays[i]);
+    result.set(array, offset);
+    offset += array.length;
+  }
+
+  return result.buffer;
 }
+
+
+function combineHeaderAndChunk(header:ArrayBuffer, chunk:ArrayBuffer) {
+  // Create a new ArrayBuffer to hold both the header and the chunk
+  const combinedBuffer = new ArrayBuffer(header.byteLength + chunk.byteLength);
+
+  // Create a Uint8Array view of the combined buffer
+  const combinedView = new Uint8Array(combinedBuffer);
+
+  // Copy the header into the combined buffer
+  combinedView.set(new Uint8Array(header), 0);
+
+  // Copy the chunk data after the header
+  combinedView.set(new Uint8Array(chunk), header.byteLength);
+
+  return combinedBuffer;
+}
+
+
+
+let audioCtx = Taro.createWebAudioContext()
 let source: AudioBufferSourceNode;
 let enablePlay = true; // 用于中断流式播放
 let chunks: ArrayBuffer[] = []; // 流式播放 chunks
-let playStatusChangedCallback:(status: ECharacterAISTATUS)=>void;
 let requestTask = null;
-export const useAudioPlayer = () => {
-  const WAV_HEADER_LENGTH = 44
-  let totalLength = WAV_HEADER_LENGTH;
+let audioBase64 = ''
+const WAV_HEADER_LENGTH = 44
 
+export const useAudioPlayer = () => {
+  
+  let totalLength = 0
   let playing = false
   let wavHeader: ArrayBuffer;
-
-  const changeStatus = (status: ECharacterAISTATUS)=> {
-    playStatusChangedCallback && playStatusChangedCallback(status)
-  }
   
-  const setFistChunk = (chunk: ArrayBuffer, _requestTask?: any) => {
+  const setFistChunk = (base64Str: string, _requestTask?: any) => {
     if(_requestTask){
       requestTask = _requestTask  
     }
     enablePlay = true;
-    changeStatus(ECharacterAISTATUS.THINKING)
+    audioBase64 = base64Str;
+    let chunk = decode(base64Str)
     emptyQuene();
+    // 第一个 chunk 内包含了头信息
     wavHeader = chunk.slice(0, WAV_HEADER_LENGTH);
     const firstChunkData = chunk.slice(WAV_HEADER_LENGTH);
     pushChunk2Quene(firstChunkData)
   }
+
+  const pushBase64ToQuene = (base64Str: string)=> {
+    audioBase64 += base64Str 
+    let buf = decode(base64Str);
+    pushChunk2Quene(buf);
+  }
+
   const pushChunk2Quene = (chunk: ArrayBuffer) => {
     chunks.push(chunk);
     totalLength += chunk.byteLength;
@@ -56,11 +91,11 @@ export const useAudioPlayer = () => {
       return;
     }
     playing = true;
-    changeStatus(ECharacterAISTATUS.RESPONDING)
-    let tmp = [...chunks]
 
-    const _chunk = combineArrayBuffers(tmp, totalLength)
-    const partChunks = combineHeaderAndChunk(wavHeader, _chunk)
+    let tmp = [...chunks];
+    const _chunk = combineArrayBuffers(tmp, totalLength);
+    const partChunks = combineHeaderAndChunk(wavHeader, _chunk);
+    
     emptyQuene();
     //@ts-ignore
     audioCtx.decodeAudioData(partChunks, (decodedBuffer: AudioBuffer) => {
@@ -72,8 +107,8 @@ export const useAudioPlayer = () => {
       source.onended = () => {
         console.log('play end')
         playing = false
-        changeStatus(ECharacterAISTATUS.IDLE)
         playChunk()
+        console.log('finally', audioBase64)
       };
       source.start(0);
     }, (err:any) => {
@@ -91,85 +126,19 @@ export const useAudioPlayer = () => {
       requestTask?.offChunkReceived?.()
     }
     emptyQuene();
-    source && source.stop();
+    
     enablePlay = false;
-    changeStatus(ECharacterAISTATUS.IDLE)
-    // console.log('stop play chunk')
-  }
 
-  const onPlayerStatusChanged = (callback: (status: number)=>void) => {
-    playStatusChangedCallback = callback
   }
 
+  
+
   return {
     pushChunk2Quene,
+    pushBase64ToQuene,
     playChunk,
     stopPlayChunk,
     setFistChunk,
-    onPlayerStatusChanged,
   }
 }
 
-function combineArrayBuffers(arrays: ArrayBuffer[], totalLength: number): ArrayBuffer {
-  const result = new Uint8Array(totalLength);
-  let offset = 0;
-
-  for (let i = 0; i < arrays.length; i++) {
-    const array = new Uint8Array(arrays[i]);
-    result.set(array, offset);
-    offset += array.length;
-  }
-
-  return result.buffer;
-}
-
-
-function combineHeaderAndChunk(header:ArrayBuffer, chunk:ArrayBuffer) {
-  // Create a new ArrayBuffer to hold both the header and the chunk
-  const combinedBuffer = new ArrayBuffer(header.byteLength + chunk.byteLength);
-
-  // Create a Uint8Array view of the combined buffer
-  const combinedView = new Uint8Array(combinedBuffer);
-
-  // Copy the header into the combined buffer
-  combinedView.set(new Uint8Array(header), 0);
-
-  // Copy the chunk data after the header
-  combinedView.set(new Uint8Array(chunk), header.byteLength);
-
-  return combinedBuffer;
-}
-
-// // Usage example
-// let storedHeader = null;
-// let isFirstChunk = true;
-
-// ws.onmessage = function(event) {
-//   if (isFirstChunk) {
-//       // Assume the first 44 bytes are the header
-//       storedHeader = event.data.slice(0, 44);
-//       // const headerInfo = parseWavHeader(storedHeader);
-//       // console.log("WAV Header Info:", headerInfo);
-
-//       // Handle the rest of the first chunk as audio data
-//       const firstChunkData = event.data.slice(44);
-//       const combinedData = combineHeaderAndChunk(storedHeader, firstChunkData);
-//       processAudioData(combinedData);
-
-//       isFirstChunk = false;
-//   } else {
-//       // For subsequent chunks, combine with the stored header
-//       const combinedData = combineHeaderAndChunk(storedHeader, event.data);
-//       processAudioData(combinedData);
-//   }
-// };
-
-// function processAudioData(audioData) {
-//   // Here you would typically send the data to the Web Audio API
-//   // For example:
-//   // audioContext.decodeAudioData(audioData)
-//   //     .then(decodedData => {
-//   //         // Use the decoded audio data
-//   //     })
-//   //     .catch(error => console.error("Error decoding audio data:", error));
-// }

+ 10 - 1
src/utils/jsonChunkParser.ts

@@ -57,6 +57,7 @@ export default class JsonChunkParser {
     // 按换行符分割字符串
     const lines = this.buffer.split("\n");
     let receivedJsonBody = {}
+    let audio = ''
     let combinedContent: string[] = []; // 用于合并 content 字段
     let combinedReasoningContent: string[] = []; // 用于合并 reasoner 字段
     
@@ -82,6 +83,7 @@ export default class JsonChunkParser {
             const jsonStr = line.substring(5); // 移除 "data:" 前缀
             const json: TJsonMessage = JSON.parse(jsonStr);
             receivedJsonBody = json
+            console.log(555555, receivedJsonBody)
             // 文本回复
             if(json.contentType === "text/plain") {
               if (json.content) {
@@ -96,6 +98,13 @@ export default class JsonChunkParser {
               if (json.content.answer?.text) {
                 combinedContent.push(json.content.answer.text); // 收集 QA 的 answer 文本
               }
+            }else if(json.contentType === "aiseek/audio_chunk"){
+              if(json.content.sentenceBegin){
+                combinedContent.push(json.content.sentenceBegin); // 收集 sentenceBegin 文本
+              }
+              if(json.content.audio){
+                audio+=audio;
+              }
             }
             
           }
@@ -109,7 +118,7 @@ export default class JsonChunkParser {
     }
 
     // 如果当前 chunk 解析完毕且有合并的内容,调用 onParsed
-    if (combinedContent.length > 0) {
+    if (combinedContent.length > 0 || audio.length >= 0) {
       onParsed({ content: combinedContent.join(""),  body: receivedJsonBody  }); // 合并并输出
     }