tap_shim.m (18023B)
1 // Hydra CoreAudio tap shim. 2 // 3 // The process-tap API (macOS 14.4+) centers on CATapDescription, an Objective-C class 4 // that the Rust `coreaudio-sys` bindings don't cover. Rather than reconstruct it through 5 // the obj-c runtime from Rust, we build the whole monitor route here against the real SDK 6 // headers — guaranteeing correct API usage — and expose a tiny C surface to Rust. 7 // 8 // A "monitor route" = tap one or more processes, fold the tap into a private aggregate 9 // device that also contains a hardware output, and run an IOProc that copies the tapped 10 // audio to that output with a live gain/mute. Per-buffer peak is written back for meters. 11 // 12 // Realtime contract: the IOProc reads `gain`/`muted` and writes `peak[]` on the audio 13 // thread. Rust owns the HydraParams allocation and accesses the same fields with volatile 14 // loads/stores. These are word-sized scalars where a torn read costs at most one stale 15 // buffer of gain — acceptable for a control parameter, and lock-free by construction. 16 17 #import <Foundation/Foundation.h> 18 #import <AppKit/AppKit.h> 19 #import <CoreAudio/CoreAudio.h> 20 #import <CoreAudio/CATapDescription.h> 21 #import <libproc.h> 22 #import <math.h> 23 24 // The process-tap entry points (macOS 14.4+) aren't pulled in by the CoreAudio umbrella 25 // header on every SDK, so declare them explicitly. Stable C ABI; resolved at link time 26 // against the CoreAudio framework. 27 extern OSStatus AudioHardwareCreateProcessTap(CATapDescription *inDescription, AudioObjectID *outTapID); 28 extern OSStatus AudioHardwareDestroyProcessTap(AudioObjectID inTapID); 29 30 // Resolve a PID to a human-friendly app name + a "kind" rank for sorting/filtering. 31 // Writes a UTF-8 name into out_name (capacity name_cap) and returns: 32 // 0 = regular foreground app (has a Dock presence — the apps users think of) 33 // 1 = accessory/background app with a known name (menu-bar agents etc.) 34 // 2 = plain process (name from the executable; system daemons, helpers) 35 // Prefers NSRunningApplication.localizedName; falls back to libproc's process name. 36 int hydra_app_info(int pid, char *out_name, int name_cap) { 37 if (!out_name || name_cap <= 0) return 2; 38 out_name[0] = '\0'; 39 int kind = 2; 40 41 @autoreleasepool { 42 NSRunningApplication *app = 43 [NSRunningApplication runningApplicationWithProcessIdentifier:(pid_t)pid]; 44 NSString *name = nil; 45 if (app != nil) { 46 name = app.localizedName; 47 switch (app.activationPolicy) { 48 case NSApplicationActivationPolicyRegular: kind = 0; break; // Dock app 49 case NSApplicationActivationPolicyAccessory: kind = 1; break; // menu-bar agent 50 default: kind = 1; break; 51 } 52 } 53 if (name != nil && name.length > 0) { 54 strlcpy(out_name, name.UTF8String, (size_t)name_cap); 55 return kind; 56 } 57 } 58 59 // Fallback: executable name via libproc (no AppKit identity, e.g. CLI/helpers). 60 char proc[PROC_PIDPATHINFO_MAXSIZE]; 61 if (proc_name(pid, proc, sizeof(proc)) > 0 && proc[0] != '\0') { 62 strlcpy(out_name, proc, (size_t)name_cap); 63 } 64 return 2; 65 } 66 67 typedef struct { 68 float gain; // linear, read by IOProc 69 int muted; // 0/1, read by IOProc 70 float peak[8]; // per-channel peak, written by IOProc 71 int running; // 1 while the IOProc is installed 72 unsigned long long callbacks; // IOProc invocation count (liveness diagnostic) 73 74 // ── Recording ring (SPSC): IOProc is the sole writer, the Rust drain thread the sole 75 // reader. Lock-free + allocation-free on the audio thread — we only ever memcpy into a 76 // pre-allocated buffer and bump an atomic write index. `rec_on` gates capture; on 77 // overrun (reader too slow) we drop samples and bump rec_overruns rather than block. 78 _Atomic int rec_on; // 1 while recording 79 float *rec_buf; // ring storage (rec_cap floats), owned by Rust 80 unsigned int rec_cap; // capacity in floats (must be > 0 when rec_on) 81 unsigned int rec_channels; // channels the writer interleaves (set when armed) 82 _Atomic unsigned long long rec_write; // total floats written (monotonic; & cap for pos) 83 _Atomic unsigned long long rec_read; // total floats consumed by the drain thread 84 _Atomic unsigned long long rec_overruns; // count of dropped floats (ring was full) 85 86 // Tap format, published by the IOProc on its first run so Rust can write a correct WAV 87 // header (0 until the first callback fires). 88 _Atomic unsigned int fmt_channels; 89 _Atomic unsigned int fmt_sample_rate; 90 } HydraParams; 91 92 typedef struct { 93 AudioObjectID tap; 94 AudioObjectID aggregate; 95 AudioDeviceIOProcID ioproc; 96 HydraParams *params; 97 } HydraRoute; 98 99 // Exposed so Rust can assert its mirrored struct matches this compiler's layout. 100 size_t hydra_params_size(void) { return sizeof(HydraParams); } 101 102 static AudioObjectID hydra_default_output(void) { 103 AudioObjectID dev = 0; 104 UInt32 sz = sizeof(dev); 105 AudioObjectPropertyAddress a = { 106 kAudioHardwarePropertyDefaultOutputDevice, 107 kAudioObjectPropertyScopeGlobal, 108 kAudioObjectPropertyElementMain 109 }; 110 AudioObjectGetPropertyData(kAudioObjectSystemObject, &a, 0, NULL, &sz, &dev); 111 return dev; 112 } 113 114 static AudioObjectID hydra_device_for_uid(const char *uid) { 115 CFStringRef cf = CFStringCreateWithCString(NULL, uid, kCFStringEncodingUTF8); 116 AudioObjectID dev = kAudioObjectUnknown; 117 UInt32 sz = sizeof(dev); 118 AudioObjectPropertyAddress a = { 119 kAudioHardwarePropertyTranslateUIDToDevice, 120 kAudioObjectPropertyScopeGlobal, 121 kAudioObjectPropertyElementMain 122 }; 123 AudioObjectGetPropertyData(kAudioObjectSystemObject, &a, sizeof(cf), &cf, &sz, &dev); 124 CFRelease(cf); 125 return dev; 126 } 127 128 // Returns a +1 retained NSString (caller owns); nil on failure. 129 static NSString *hydra_device_uid(AudioObjectID dev) { 130 CFStringRef uid = NULL; 131 UInt32 sz = sizeof(uid); 132 AudioObjectPropertyAddress a = { 133 kAudioDevicePropertyDeviceUID, 134 kAudioObjectPropertyScopeGlobal, 135 kAudioObjectPropertyElementMain 136 }; 137 if (AudioObjectGetPropertyData(dev, &a, 0, NULL, &sz, &uid) != noErr || uid == NULL) { 138 return nil; 139 } 140 return (__bridge_transfer NSString *)uid; 141 } 142 143 // The realtime IOProc body, shared by process-tap routes and hardware-input routes. They 144 // differ only in WHICH input buffer carries the source audio: 145 // - process tap: the LAST input buffer (CoreAudio appends the tap after sub-device streams) 146 // - hardware input: the FIRST input buffer (the input device's own stream) 147 // `useLast` picks. Everything else — gain/mute, peak, the recording ring, fmt publish — is 148 // identical, so it lives here once. Must stay allocation/lock free (audio thread). 149 static void hydra_render(HydraParams *P, const AudioBufferList *inData, AudioBufferList *outData, int useLast) { 150 P->callbacks++; 151 const float g = P->muted ? 0.0f : P->gain; 152 const UInt32 nin = inData ? inData->mNumberBuffers : 0; 153 const UInt32 nout = outData ? outData->mNumberBuffers : 0; 154 155 const AudioBuffer *src = NULL; 156 if (nin > 0) src = useLast ? &inData->mBuffers[nin - 1] : &inData->mBuffers[0]; 157 const float *srcData = (src && src->mData) ? (const float *)src->mData : NULL; 158 const UInt32 srcCh = src ? src->mNumberChannels : 0; 159 const UInt32 srcFrames = (srcData && srcCh) ? (src->mDataByteSize / sizeof(float) / srcCh) : 0; 160 161 if (srcCh && __c11_atomic_load(&P->fmt_channels, __ATOMIC_RELAXED) == 0) { 162 __c11_atomic_store(&P->fmt_channels, srcCh, __ATOMIC_RELEASE); 163 } 164 165 // Recording: raw (pre-gain) source → SPSC ring; drop+count on overrun, never block. 166 if (__c11_atomic_load(&P->rec_on, __ATOMIC_ACQUIRE) && srcData && P->rec_buf && P->rec_cap) { 167 const unsigned int total = srcFrames * srcCh; 168 unsigned long long w = __c11_atomic_load(&P->rec_write, __ATOMIC_RELAXED); 169 unsigned long long r = __c11_atomic_load(&P->rec_read, __ATOMIC_ACQUIRE); 170 const unsigned int cap = P->rec_cap; 171 unsigned int freeSpace = (unsigned int)(cap - (w - r)); 172 if (total <= freeSpace) { 173 for (unsigned int i = 0; i < total; i++) P->rec_buf[(w + i) % cap] = srcData[i]; 174 __c11_atomic_store(&P->rec_write, w + total, __ATOMIC_RELEASE); 175 } else { 176 __c11_atomic_fetch_add(&P->rec_overruns, total, __ATOMIC_RELAXED); 177 } 178 } 179 180 float peak = 0.0f; 181 for (UInt32 ob = 0; ob < nout; ob++) { 182 float *out = (float *)outData->mBuffers[ob].mData; 183 if (!out) continue; 184 const UInt32 outCh = outData->mBuffers[ob].mNumberChannels; 185 const UInt32 outFrames = outCh ? (outData->mBuffers[ob].mDataByteSize / sizeof(float) / outCh) : 0; 186 memset(out, 0, outData->mBuffers[ob].mDataByteSize); 187 if (!srcData || srcCh == 0) continue; 188 const UInt32 frames = outFrames < srcFrames ? outFrames : srcFrames; 189 const UInt32 ch = outCh < srcCh ? outCh : srcCh; 190 for (UInt32 f = 0; f < frames; f++) { 191 for (UInt32 c = 0; c < ch; c++) { 192 float v = srcData[f * srcCh + c] * g; 193 out[f * outCh + c] = v; 194 float av = fabsf(v); 195 if (av > peak) peak = av; 196 } 197 } 198 } 199 P->peak[0] = peak; 200 } 201 202 // Read the aggregate's nominal sample rate into params (for the WAV header). 203 static void hydra_publish_rate(AudioObjectID agg, HydraParams *P) { 204 Float64 sr = 0; 205 UInt32 srSize = sizeof(sr); 206 AudioObjectPropertyAddress srAddr = { 207 kAudioDevicePropertyNominalSampleRate, kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyElementMain 208 }; 209 if (AudioObjectGetPropertyData(agg, &srAddr, 0, NULL, &srSize, &sr) == noErr && sr > 0) { 210 __c11_atomic_store(&P->fmt_sample_rate, (unsigned int)sr, __ATOMIC_RELEASE); 211 } 212 } 213 214 // Build a route capturing a hardware INPUT device (e.g. the MacBook mic) to an output. 215 // No process tap, no TCC: just an aggregate of [input, output] with the shared IOProc 216 // reading the input device's own stream (buffer 0). Returns an OSStatus. 217 OSStatus hydra_input_start(const char *inputUID, // required: the source input device 218 const char *outputUID, // nullable -> default output 219 HydraParams *params, 220 HydraRoute *outRoute) { 221 if (!inputUID || !inputUID[0] || !params || !outRoute) return kAudio_ParamError; 222 @autoreleasepool { 223 AudioObjectID inDev = hydra_device_for_uid(inputUID); 224 NSString *inUID = hydra_device_uid(inDev); 225 if (inUID == nil) return kAudioHardwareBadDeviceError; 226 227 AudioObjectID outDev = (outputUID && outputUID[0]) ? hydra_device_for_uid(outputUID) 228 : hydra_default_output(); 229 NSString *outUID = hydra_device_uid(outDev); 230 if (outUID == nil) return kAudioHardwareBadDeviceError; 231 232 // Private aggregate: input device first (its stream is input buffer 0), output as the 233 // clock master. No tap list. 234 NSString *aggUID = [[NSUUID UUID] UUIDString]; 235 NSDictionary *description = @{ 236 @(kAudioAggregateDeviceNameKey): @"Hydra Input", 237 @(kAudioAggregateDeviceUIDKey): aggUID, 238 @(kAudioAggregateDeviceIsPrivateKey): @YES, 239 @(kAudioAggregateDeviceMainSubDeviceKey): outUID, 240 @(kAudioAggregateDeviceSubDeviceListKey): @[ 241 @{ @(kAudioSubDeviceUIDKey): inUID }, 242 @{ @(kAudioSubDeviceUIDKey): outUID }, 243 ], 244 }; 245 AudioObjectID agg = 0; 246 OSStatus st = AudioHardwareCreateAggregateDevice((__bridge CFDictionaryRef)description, &agg); 247 if (st != noErr || agg == 0) return st != noErr ? st : kAudioHardwareUnspecifiedError; 248 249 HydraParams *P = params; 250 AudioDeviceIOProcID procID = NULL; 251 st = AudioDeviceCreateIOProcIDWithBlock(&procID, agg, NULL, 252 ^(const AudioTimeStamp *now, const AudioBufferList *inD, const AudioTimeStamp *inT, 253 AudioBufferList *outD, const AudioTimeStamp *outT) { 254 (void)now; (void)inT; (void)outT; 255 hydra_render(P, inD, outD, /*useLast=*/0); // input device = first buffer 256 }); 257 if (st != noErr || procID == NULL) { 258 AudioHardwareDestroyAggregateDevice(agg); 259 return st != noErr ? st : kAudioHardwareUnspecifiedError; 260 } 261 hydra_publish_rate(agg, P); 262 st = AudioDeviceStart(agg, procID); 263 if (st != noErr) { 264 AudioDeviceDestroyIOProcID(agg, procID); 265 AudioHardwareDestroyAggregateDevice(agg); 266 return st; 267 } 268 P->running = 1; 269 outRoute->tap = 0; // no tap; teardown handles tap==0 270 outRoute->aggregate = agg; 271 outRoute->ioproc = procID; 272 outRoute->params = P; 273 return noErr; 274 } 275 } 276 277 // Build tap + aggregate + IOProc and start it. Returns an OSStatus (noErr on success). 278 OSStatus hydra_monitor_start(const AudioObjectID *procObjs, 279 int nProcs, 280 const char *outputUID, // nullable -> default output 281 HydraParams *params, 282 HydraRoute *outRoute) { 283 if (!procObjs || nProcs <= 0 || !params || !outRoute) { 284 return kAudio_ParamError; 285 } 286 @autoreleasepool { 287 // 1. Tap description over the requested processes (stereo mixdown). 288 NSMutableArray<NSNumber *> *procs = [NSMutableArray arrayWithCapacity:nProcs]; 289 for (int i = 0; i < nProcs; i++) { 290 [procs addObject:@(procObjs[i])]; 291 } 292 CATapDescription *desc = [[CATapDescription alloc] initStereoMixdownOfProcesses:procs]; 293 desc.name = @"Hydra Tap"; 294 desc.UUID = [NSUUID UUID]; 295 desc.muteBehavior = CATapUnmuted; // keep the app audible while we tap it 296 297 // 2. Create the process tap. 298 AudioObjectID tapID = 0; 299 OSStatus st = AudioHardwareCreateProcessTap(desc, &tapID); 300 if (st != noErr || tapID == 0) { 301 return st != noErr ? st : kAudioHardwareUnspecifiedError; 302 } 303 304 // 3. Resolve the output device + its UID for the aggregate's sub-device list. 305 AudioObjectID outDev = (outputUID && outputUID[0]) ? hydra_device_for_uid(outputUID) 306 : hydra_default_output(); 307 NSString *outUID = hydra_device_uid(outDev); 308 if (outUID == nil) { 309 AudioHardwareDestroyProcessTap(tapID); 310 return kAudioHardwareBadDeviceError; 311 } 312 313 // 4. Private aggregate: hardware output as sub-device, tap folded in as input. 314 // The aggregate dictionary keys are C-string #defines, so box them with @(...). 315 NSString *aggUID = [[NSUUID UUID] UUIDString]; 316 NSDictionary *description = @{ 317 @(kAudioAggregateDeviceNameKey): @"Hydra Monitor", 318 @(kAudioAggregateDeviceUIDKey): aggUID, 319 @(kAudioAggregateDeviceIsPrivateKey): @YES, 320 @(kAudioAggregateDeviceMainSubDeviceKey): outUID, 321 @(kAudioAggregateDeviceSubDeviceListKey): @[ 322 @{ @(kAudioSubDeviceUIDKey): outUID } 323 ], 324 @(kAudioAggregateDeviceTapListKey): @[ 325 @{ @(kAudioSubTapUIDKey): desc.UUID.UUIDString } 326 ], 327 @(kAudioAggregateDeviceTapAutoStartKey): @YES, 328 }; 329 330 AudioObjectID agg = 0; 331 st = AudioHardwareCreateAggregateDevice((__bridge CFDictionaryRef)description, &agg); 332 if (st != noErr || agg == 0) { 333 AudioHardwareDestroyProcessTap(tapID); 334 return st != noErr ? st : kAudioHardwareUnspecifiedError; 335 } 336 337 // 5. IOProc: the shared render reads the tap (LAST input buffer) → output. 338 HydraParams *P = params; 339 AudioDeviceIOProcID procID = NULL; 340 st = AudioDeviceCreateIOProcIDWithBlock(&procID, agg, NULL, 341 ^(const AudioTimeStamp *now, const AudioBufferList *inData, 342 const AudioTimeStamp *inTime, AudioBufferList *outData, 343 const AudioTimeStamp *outTime) { 344 (void)now; (void)inTime; (void)outTime; 345 hydra_render(P, inData, outData, /*useLast=*/1); // tap = last input buffer 346 }); 347 if (st != noErr || procID == NULL) { 348 AudioHardwareDestroyAggregateDevice(agg); 349 AudioHardwareDestroyProcessTap(tapID); 350 return st != noErr ? st : kAudioHardwareUnspecifiedError; 351 } 352 353 hydra_publish_rate(agg, P); 354 355 st = AudioDeviceStart(agg, procID); 356 if (st != noErr) { 357 AudioDeviceDestroyIOProcID(agg, procID); 358 AudioHardwareDestroyAggregateDevice(agg); 359 AudioHardwareDestroyProcessTap(tapID); 360 return st; 361 } 362 363 P->running = 1; 364 outRoute->tap = tapID; 365 outRoute->aggregate = agg; 366 outRoute->ioproc = procID; 367 outRoute->params = P; 368 return noErr; 369 } 370 } 371 372 // Tear down in the order that keeps coreaudiod happy: stop IO, remove the IOProc, destroy 373 // the aggregate (which references the tap), then destroy the tap. 374 void hydra_monitor_stop(HydraRoute *r) { 375 if (!r) return; 376 if (r->aggregate && r->ioproc) { 377 AudioDeviceStop(r->aggregate, r->ioproc); 378 AudioDeviceDestroyIOProcID(r->aggregate, r->ioproc); 379 } 380 if (r->aggregate) AudioHardwareDestroyAggregateDevice(r->aggregate); 381 if (r->tap) AudioHardwareDestroyProcessTap(r->tap); 382 if (r->params) r->params->running = 0; 383 r->tap = 0; 384 r->aggregate = 0; 385 r->ioproc = NULL; 386 }