使用 Windows Core Audio APIs 进行 Loopback Recording 并生成 WAV 文件

2022-10-23 15:28 CST

参考文档

COM Coding Practices Audio File Format Specifications Core Audio APIs Loopback Recording

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
#include <iostream>
#include <fstream>
#include <vector>

#include <mmdeviceapi.h>
#include <combaseapi.h>
#include <atlbase.h>
#include <Functiondiscoverykeys_devpkey.h>
#include <Audioclient.h>
#include <Audiopolicy.h>

// 利用RAII手法,自动调用 CoUninitialize
class CoInitializeGuard {
public:
    CoInitializeGuard()
    {
        _hr = CoInitializeEx(nullptr, COINIT::COINIT_MULTITHREADED);
    }

    ~CoInitializeGuard()
    {
        if (_hr == S_OK || _hr == S_FALSE) {
            CoUninitialize();
        }
    }

    HRESULT result() const { return _hr; }

private:
    HRESULT _hr;
};

constexpr inline void exit_on_failed(HRESULT hr);
void printEndpoints(CComPtr<IMMDeviceCollection> pColletion);
std::string wchars_to_mbs(const wchar_t* s);

int main()
{
    HRESULT hr{};

    CoInitializeGuard coInitializeGuard;
    exit_on_failed(coInitializeGuard.result());

    // COM 对象都用 CComPtr 包装,会自动调用 Release
    // COM 接口分配的堆变量用 CComHeapPtr 包装,会自动调用 CoTaskMemFree
    CComPtr<IMMDeviceEnumerator> pEnumerator;
    hr = pEnumerator.CoCreateInstance(__uuidof(MMDeviceEnumerator));
    exit_on_failed(hr);

    // 打印所有可用的音频设备
    //CComPtr<IMMDeviceCollection> pColletion;
    //hr = pEnumerator->EnumAudioEndpoints(eRender, DEVICE_STATE_ACTIVE, &pColletion);
    //exit_on_failed(hr);
    //printEndpoints(pColletion);

    // 使用默认的 Audio Endpoint,eRender 表示音频播放设备,而不是录音设备
    CComPtr<IMMDevice> pEndpoint;
    hr = pEnumerator->GetDefaultAudioEndpoint(eRender, eConsole, &pEndpoint);
    exit_on_failed(hr);

    // 打印出播放设备的名字,可能包含中文
    CComPtr<IPropertyStore> pProps;
    hr = pEndpoint->OpenPropertyStore(STGM_READ, &pProps);
    exit_on_failed(hr);
    PROPVARIANT varName;
    PropVariantInit(&varName);
    hr = pProps->GetValue(PKEY_Device_FriendlyName, &varName);
    exit_on_failed(hr);
    std::cout << "select audio endpoint: " << wchars_to_mbs(varName.pwszVal) << std::endl;
    PropVariantClear(&varName);

    // 由 IMMDevice 对象 得到 IAudioClient 对象 
    CComPtr<IAudioClient> pAudioClient;
    hr = pEndpoint->Activate(__uuidof(IAudioClient), CLSCTX_ALL, nullptr, (void**)&pAudioClient);
    exit_on_failed(hr);

    // 获得音频播放设备格式信息
    CComHeapPtr<WAVEFORMATEX> pDeviceFormat;
    pAudioClient->GetMixFormat(&pDeviceFormat);

    constexpr int REFTIMES_PER_SEC = 10000000;      // 1 reference_time = 100ns
    constexpr int REFTIMES_PER_MILLISEC = 10000;

    // 初始化 IAudioClient 对象
    const REFERENCE_TIME hnsRequestedDuration = 2 * REFTIMES_PER_SEC; // 1s
    hr = pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_LOOPBACK, hnsRequestedDuration, 0, pDeviceFormat, nullptr);
    exit_on_failed(hr);

    // 获得缓冲区大小
    UINT32 bufferFrameCount{};
    hr = pAudioClient->GetBufferSize(&bufferFrameCount);
    exit_on_failed(hr);

    // 由 IAudioClient 对象 得到 IAudioCaptureClient 对象,也就是将音频播放设备视为录音设备
    CComPtr<IAudioCaptureClient> pCaptureClient;
    hr = pAudioClient->GetService(__uuidof(IAudioCaptureClient), (void**)&pCaptureClient);
    exit_on_failed(hr);
    
    // 开始录音
    hr = pAudioClient->Start();
    exit_on_failed(hr);

    const REFERENCE_TIME hnsActualDuration = (long long)REFTIMES_PER_SEC * bufferFrameCount / pDeviceFormat->nSamplesPerSec;

    std::ofstream ofile("./out.wav", std::ios::binary);
    if (!ofile) {
        exit(-1);
    }

    // 写入各种 header 信息

    constexpr UINT32 sizePlaceholder{};
    // master RIFF chunk
    ofile.write("RIFF", 4);
    ofile.write((const char*)&sizePlaceholder, 4);
    ofile.write("WAVE", 4);
    // 12

    // fmt chunk
    ofile.write("fmt ", 4);
    UINT32 fmt_ckSize = sizeof(WAVEFORMATEX) + pDeviceFormat->cbSize;
    ofile.write((const char*)&fmt_ckSize, 4);
    {
        auto p = pDeviceFormat.Detach();
        ofile.write((const char*)p, fmt_ckSize);
        pDeviceFormat.Attach(p);
    }
    // 8 + fmt_ckSize

    // fact chunk
    bool has_fact_chunt = pDeviceFormat->wFormatTag != WAVE_FORMAT_PCM;
    if (has_fact_chunt) {
        ofile.write("fact", 4);
        UINT32 fact_ckSize = 4;
        ofile.write((const char*)&fact_ckSize, 4);
        DWORD dwSampleLength{};
        ofile.write((const char*)&dwSampleLength, 4);
    }
    // 12

    // data chunk
    ofile.write("data", 4);
    ofile.write((const char*)&sizePlaceholder, 4);

    UINT32 data_ckSize = 0; // samples data 的大小
    UINT32 frame_count = 0; // 帧数

    constexpr int max_duration = 60;    // 录制 60s
    int seconds{};                      // 已经录制的时间

    time_t t_begin = time(NULL);

    //UINT32 
    do {
        // 睡眠一定时间,防止CPU占用率高
        Sleep(9);

        BYTE* pData{};                  // samples 数据
        UINT32 numFramesAvailable{};    // 缓冲区有多少帧
        DWORD dwFlags{};

        hr = pCaptureClient->GetBuffer(&pData, &numFramesAvailable, &dwFlags, NULL, NULL);
        exit_on_failed(hr);

        int frame_bytes = pDeviceFormat->nChannels * pDeviceFormat->wBitsPerSample / 8;
        int count = numFramesAvailable * frame_bytes;
        ofile.write((const char*)pData, count);
        data_ckSize += count;
        frame_count += numFramesAvailable;
        seconds = frame_count / pDeviceFormat->nSamplesPerSec;
        std::cout << "numFramesAvailable: " << numFramesAvailable << " seconds: " << seconds << std::endl;

        hr = pCaptureClient->ReleaseBuffer(numFramesAvailable);
        exit_on_failed(hr);

    } while (seconds < max_duration);

    // 检测实际花了多久,实际时间 - max_duration = 延迟
    time_t t_end = time(NULL);
    std::cout << "use wall clock: " << t_end - t_begin << "s" << std::endl;

    if (data_ckSize % 2) {
        ofile.put(0);
        ++data_ckSize;
    }

    UINT32 wave_ckSize = 4 + (8 + fmt_ckSize) + (8 + data_ckSize);
    ofile.seekp(4);
    ofile.write((const char*)&wave_ckSize, 4);

    if (has_fact_chunt) {
        ofile.seekp(12 + (8 + fmt_ckSize) + 8);
        ofile.write((const char*)&frame_count, 4);
    }

    ofile.seekp(12 + (8 + fmt_ckSize) + 12 + 4);
    ofile.write((const char*)&data_ckSize, 4);

    ofile.close();

    //所有 COM 对象和 Heap 都会自动释放
}

void printEndpoints(CComPtr<IMMDeviceCollection> pColletion)
{
    HRESULT hr{};

    UINT count{};
    hr = pColletion->GetCount(&count);
    exit_on_failed(hr);

    for (UINT i = 0; i < count; ++i) {
        CComPtr<IMMDevice> pEndpoint;
        hr = pColletion->Item(i, &pEndpoint);
        exit_on_failed(hr);

        CComHeapPtr<WCHAR> pwszID;
        hr = pEndpoint->GetId(&pwszID);
        exit_on_failed(hr);

        CComPtr<IPropertyStore> pProps;
        hr = pEndpoint->OpenPropertyStore(STGM_READ, &pProps);
        exit_on_failed(hr);

        PROPVARIANT varName;
        PropVariantInit(&varName);
        hr = pProps->GetValue(PKEY_Device_FriendlyName, &varName);
        exit_on_failed(hr);

        std::cout << wchars_to_mbs(varName.pwszVal) << std::endl;

        PropVariantClear(&varName);
    }
}

constexpr inline void exit_on_failed(HRESULT hr) {
    if (FAILED(hr)) {
        exit(-1);
    }
}

// 汉字会有编码问题,全部转成窄字符
std::string wchars_to_mbs(const wchar_t* src)
{
    UINT cp = GetACP();
    int ccWideChar = (int)wcslen(src);
    int n = WideCharToMultiByte(cp, 0, src, ccWideChar, 0, 0, 0, 0);

    std::vector<char> buf(n);
    WideCharToMultiByte(cp, 0, src, ccWideChar, buf.data(), (int)buf.size(), 0, 0);
    std::string dst(buf.data(), buf.size());
    return dst;
}

使用 Windows Core Audio APIs 进行 Loopback Recording 并生成 WAV 文件 by mkckr0 is licensed under CC BY-NC-ND 4.0