1 C++为内存中的pcm音频数据增加wav文件头

最近面临这样的一个任务,在C++层使用tts进行语音合成之后需要将合成音频的base64字符串实时传递到web端,使用js在web端进行播放,而js在web端需要wav音频流,直接传递音频裸流到js中会播放出错。

所以需要先将C++生成的音频裸流加上wav文件头,然后将内存中的音频字节数据进行base64编码之后再传递到web中。

针对这个功能,我封装了一个C++文件头用于实现这个功能,代码如下,

#ifndef ADD_WAV_HEADER_H
#define ADD_WAV_HEADER_H

#include <vector>

#pragma pack(push, 1) // 确保结构体按1字节对齐

struct RIFFHeader {
    char riff[4]; // "RIFF"
    int fileSize; // 文件大小,不包括这8字节
    char wave[4]; // "WAVE"
};

struct FormatChunk {
    char fmt[4]; // "fmt "
    int chunkSize; // 子块大小,通常为16或18
    short audioFormat; // 音频格式,1为PCM
    short numChannels; // 通道数
    int sampleRate; // 采样率
    int byteRate; // 每秒字节数
    short blockAlign; // 块对齐
    short bitsPerSample; // 每个样本的位数
};

struct DataChunk {
    char data[4]; // "data"
    int dataSize; // 数据大小
};

#pragma pack(pop)

inline void AddWAVHeader(std::vector<char>& audioData, int sampleRate, int channels, int bitsPerSample) {
    int fileSize = audioData.size() + sizeof(RIFFHeader) + sizeof(FormatChunk) - 8;
    int byteRate = sampleRate * channels * bitsPerSample / 8;
    int blockAlign = channels * bitsPerSample / 8;

    RIFFHeader riffHeader = {
        {'R', 'I', 'F', 'F'},
        fileSize,
        {'W', 'A', 'V', 'E'}
    };

    FormatChunk formatChunk = {
        {'f', 'm', 't', ' '},
        sizeof(FormatChunk) - 8,
        1, // PCM
        (short)channels,
        sampleRate,
        byteRate,
        (short)blockAlign,
        (short)bitsPerSample
    };

    DataChunk dataChunk = {
        {'d', 'a', 't', 'a'},
        static_cast<int>(audioData.size())
    };

    // 将文件头添加到音频数据之前
    audioData.insert(audioData.begin(), reinterpret_cast<char*>(&dataChunk), reinterpret_cast<char*>(&dataChunk) + sizeof(dataChunk));
    audioData.insert(audioData.begin(), reinterpret_cast<char*>(&formatChunk), reinterpret_cast<char*>(&formatChunk) + sizeof(formatChunk));
    audioData.insert(audioData.begin(), reinterpret_cast<char*>(&riffHeader), reinterpret_cast<char*>(&riffHeader) + sizeof(riffHeader));
}

inline void AddWAVHeader(std::vector<uint8_t>& audioData, int sampleRate, int channels, int bitsPerSample) {
    int fileSize = audioData.size() + sizeof(RIFFHeader) + sizeof(FormatChunk) - 8;
    int byteRate = sampleRate * channels * bitsPerSample / 8;
    int blockAlign = channels * bitsPerSample / 8;

    RIFFHeader riffHeader = {
        {'R', 'I', 'F', 'F'},
        fileSize,
        {'W', 'A', 'V', 'E'}
    };

    FormatChunk formatChunk = {
        {'f', 'm', 't', ' '},
        sizeof(FormatChunk) - 8,
        1, // PCM
        (short)channels,
        sampleRate,
        byteRate,
        (short)blockAlign,
        (short)bitsPerSample
    };

    DataChunk dataChunk = {
        {'d', 'a', 't', 'a'},
        static_cast<int>(audioData.size())
    };

    // 将文件头添加到音频数据之前
    audioData.insert(audioData.begin(), reinterpret_cast<char*>(&dataChunk), reinterpret_cast<char*>(&dataChunk) + sizeof(dataChunk));
    audioData.insert(audioData.begin(), reinterpret_cast<char*>(&formatChunk), reinterpret_cast<char*>(&formatChunk) + sizeof(formatChunk));
    audioData.insert(audioData.begin(), reinterpret_cast<char*>(&riffHeader), reinterpret_cast<char*>(&riffHeader) + sizeof(riffHeader));
}

#endif // !ADD_WAV_HEADER_H

功能比较小也比较常规,做个备完,以免之后又要造重复的轮子。