hydra

Terminal replacement for Loopback — virtual audio devices and routing on macOS, from a ratatui TUI.
Log | Files | Refs | README | LICENSE

tap_shim.m (18023B)


      1 // Hydra CoreAudio tap shim.
      2 //
      3 // The process-tap API (macOS 14.4+) centers on CATapDescription, an Objective-C class
      4 // that the Rust `coreaudio-sys` bindings don't cover. Rather than reconstruct it through
      5 // the obj-c runtime from Rust, we build the whole monitor route here against the real SDK
      6 // headers — guaranteeing correct API usage — and expose a tiny C surface to Rust.
      7 //
      8 // A "monitor route" = tap one or more processes, fold the tap into a private aggregate
      9 // device that also contains a hardware output, and run an IOProc that copies the tapped
     10 // audio to that output with a live gain/mute. Per-buffer peak is written back for meters.
     11 //
     12 // Realtime contract: the IOProc reads `gain`/`muted` and writes `peak[]` on the audio
     13 // thread. Rust owns the HydraParams allocation and accesses the same fields with volatile
     14 // loads/stores. These are word-sized scalars where a torn read costs at most one stale
     15 // buffer of gain — acceptable for a control parameter, and lock-free by construction.
     16 
     17 #import <Foundation/Foundation.h>
     18 #import <AppKit/AppKit.h>
     19 #import <CoreAudio/CoreAudio.h>
     20 #import <CoreAudio/CATapDescription.h>
     21 #import <libproc.h>
     22 #import <math.h>
     23 
     24 // The process-tap entry points (macOS 14.4+) aren't pulled in by the CoreAudio umbrella
     25 // header on every SDK, so declare them explicitly. Stable C ABI; resolved at link time
     26 // against the CoreAudio framework.
     27 extern OSStatus AudioHardwareCreateProcessTap(CATapDescription *inDescription, AudioObjectID *outTapID);
     28 extern OSStatus AudioHardwareDestroyProcessTap(AudioObjectID inTapID);
     29 
     30 // Resolve a PID to a human-friendly app name + a "kind" rank for sorting/filtering.
     31 // Writes a UTF-8 name into out_name (capacity name_cap) and returns:
     32 //   0 = regular foreground app (has a Dock presence — the apps users think of)
     33 //   1 = accessory/background app with a known name (menu-bar agents etc.)
     34 //   2 = plain process (name from the executable; system daemons, helpers)
     35 // Prefers NSRunningApplication.localizedName; falls back to libproc's process name.
     36 int hydra_app_info(int pid, char *out_name, int name_cap) {
     37     if (!out_name || name_cap <= 0) return 2;
     38     out_name[0] = '\0';
     39     int kind = 2;
     40 
     41     @autoreleasepool {
     42         NSRunningApplication *app =
     43             [NSRunningApplication runningApplicationWithProcessIdentifier:(pid_t)pid];
     44         NSString *name = nil;
     45         if (app != nil) {
     46             name = app.localizedName;
     47             switch (app.activationPolicy) {
     48                 case NSApplicationActivationPolicyRegular:   kind = 0; break;  // Dock app
     49                 case NSApplicationActivationPolicyAccessory:  kind = 1; break; // menu-bar agent
     50                 default:                                      kind = 1; break;
     51             }
     52         }
     53         if (name != nil && name.length > 0) {
     54             strlcpy(out_name, name.UTF8String, (size_t)name_cap);
     55             return kind;
     56         }
     57     }
     58 
     59     // Fallback: executable name via libproc (no AppKit identity, e.g. CLI/helpers).
     60     char proc[PROC_PIDPATHINFO_MAXSIZE];
     61     if (proc_name(pid, proc, sizeof(proc)) > 0 && proc[0] != '\0') {
     62         strlcpy(out_name, proc, (size_t)name_cap);
     63     }
     64     return 2;
     65 }
     66 
     67 typedef struct {
     68     float          gain;       // linear, read by IOProc
     69     int            muted;      // 0/1, read by IOProc
     70     float          peak[8];    // per-channel peak, written by IOProc
     71     int            running;    // 1 while the IOProc is installed
     72     unsigned long long callbacks; // IOProc invocation count (liveness diagnostic)
     73 
     74     // ── Recording ring (SPSC): IOProc is the sole writer, the Rust drain thread the sole
     75     // reader. Lock-free + allocation-free on the audio thread — we only ever memcpy into a
     76     // pre-allocated buffer and bump an atomic write index. `rec_on` gates capture; on
     77     // overrun (reader too slow) we drop samples and bump rec_overruns rather than block.
     78     _Atomic int            rec_on;        // 1 while recording
     79     float                 *rec_buf;       // ring storage (rec_cap floats), owned by Rust
     80     unsigned int           rec_cap;       // capacity in floats (must be > 0 when rec_on)
     81     unsigned int           rec_channels;  // channels the writer interleaves (set when armed)
     82     _Atomic unsigned long long rec_write; // total floats written (monotonic; & cap for pos)
     83     _Atomic unsigned long long rec_read;  // total floats consumed by the drain thread
     84     _Atomic unsigned long long rec_overruns; // count of dropped floats (ring was full)
     85 
     86     // Tap format, published by the IOProc on its first run so Rust can write a correct WAV
     87     // header (0 until the first callback fires).
     88     _Atomic unsigned int   fmt_channels;
     89     _Atomic unsigned int   fmt_sample_rate;
     90 } HydraParams;
     91 
     92 typedef struct {
     93     AudioObjectID       tap;
     94     AudioObjectID       aggregate;
     95     AudioDeviceIOProcID ioproc;
     96     HydraParams        *params;
     97 } HydraRoute;
     98 
     99 // Exposed so Rust can assert its mirrored struct matches this compiler's layout.
    100 size_t hydra_params_size(void) { return sizeof(HydraParams); }
    101 
    102 static AudioObjectID hydra_default_output(void) {
    103     AudioObjectID dev = 0;
    104     UInt32 sz = sizeof(dev);
    105     AudioObjectPropertyAddress a = {
    106         kAudioHardwarePropertyDefaultOutputDevice,
    107         kAudioObjectPropertyScopeGlobal,
    108         kAudioObjectPropertyElementMain
    109     };
    110     AudioObjectGetPropertyData(kAudioObjectSystemObject, &a, 0, NULL, &sz, &dev);
    111     return dev;
    112 }
    113 
    114 static AudioObjectID hydra_device_for_uid(const char *uid) {
    115     CFStringRef cf = CFStringCreateWithCString(NULL, uid, kCFStringEncodingUTF8);
    116     AudioObjectID dev = kAudioObjectUnknown;
    117     UInt32 sz = sizeof(dev);
    118     AudioObjectPropertyAddress a = {
    119         kAudioHardwarePropertyTranslateUIDToDevice,
    120         kAudioObjectPropertyScopeGlobal,
    121         kAudioObjectPropertyElementMain
    122     };
    123     AudioObjectGetPropertyData(kAudioObjectSystemObject, &a, sizeof(cf), &cf, &sz, &dev);
    124     CFRelease(cf);
    125     return dev;
    126 }
    127 
    128 // Returns a +1 retained NSString (caller owns); nil on failure.
    129 static NSString *hydra_device_uid(AudioObjectID dev) {
    130     CFStringRef uid = NULL;
    131     UInt32 sz = sizeof(uid);
    132     AudioObjectPropertyAddress a = {
    133         kAudioDevicePropertyDeviceUID,
    134         kAudioObjectPropertyScopeGlobal,
    135         kAudioObjectPropertyElementMain
    136     };
    137     if (AudioObjectGetPropertyData(dev, &a, 0, NULL, &sz, &uid) != noErr || uid == NULL) {
    138         return nil;
    139     }
    140     return (__bridge_transfer NSString *)uid;
    141 }
    142 
    143 // The realtime IOProc body, shared by process-tap routes and hardware-input routes. They
    144 // differ only in WHICH input buffer carries the source audio:
    145 //   - process tap: the LAST input buffer (CoreAudio appends the tap after sub-device streams)
    146 //   - hardware input: the FIRST input buffer (the input device's own stream)
    147 // `useLast` picks. Everything else — gain/mute, peak, the recording ring, fmt publish — is
    148 // identical, so it lives here once. Must stay allocation/lock free (audio thread).
    149 static void hydra_render(HydraParams *P, const AudioBufferList *inData, AudioBufferList *outData, int useLast) {
    150     P->callbacks++;
    151     const float g = P->muted ? 0.0f : P->gain;
    152     const UInt32 nin = inData ? inData->mNumberBuffers : 0;
    153     const UInt32 nout = outData ? outData->mNumberBuffers : 0;
    154 
    155     const AudioBuffer *src = NULL;
    156     if (nin > 0) src = useLast ? &inData->mBuffers[nin - 1] : &inData->mBuffers[0];
    157     const float *srcData = (src && src->mData) ? (const float *)src->mData : NULL;
    158     const UInt32 srcCh = src ? src->mNumberChannels : 0;
    159     const UInt32 srcFrames = (srcData && srcCh) ? (src->mDataByteSize / sizeof(float) / srcCh) : 0;
    160 
    161     if (srcCh && __c11_atomic_load(&P->fmt_channels, __ATOMIC_RELAXED) == 0) {
    162         __c11_atomic_store(&P->fmt_channels, srcCh, __ATOMIC_RELEASE);
    163     }
    164 
    165     // Recording: raw (pre-gain) source → SPSC ring; drop+count on overrun, never block.
    166     if (__c11_atomic_load(&P->rec_on, __ATOMIC_ACQUIRE) && srcData && P->rec_buf && P->rec_cap) {
    167         const unsigned int total = srcFrames * srcCh;
    168         unsigned long long w = __c11_atomic_load(&P->rec_write, __ATOMIC_RELAXED);
    169         unsigned long long r = __c11_atomic_load(&P->rec_read, __ATOMIC_ACQUIRE);
    170         const unsigned int cap = P->rec_cap;
    171         unsigned int freeSpace = (unsigned int)(cap - (w - r));
    172         if (total <= freeSpace) {
    173             for (unsigned int i = 0; i < total; i++) P->rec_buf[(w + i) % cap] = srcData[i];
    174             __c11_atomic_store(&P->rec_write, w + total, __ATOMIC_RELEASE);
    175         } else {
    176             __c11_atomic_fetch_add(&P->rec_overruns, total, __ATOMIC_RELAXED);
    177         }
    178     }
    179 
    180     float peak = 0.0f;
    181     for (UInt32 ob = 0; ob < nout; ob++) {
    182         float *out = (float *)outData->mBuffers[ob].mData;
    183         if (!out) continue;
    184         const UInt32 outCh = outData->mBuffers[ob].mNumberChannels;
    185         const UInt32 outFrames = outCh ? (outData->mBuffers[ob].mDataByteSize / sizeof(float) / outCh) : 0;
    186         memset(out, 0, outData->mBuffers[ob].mDataByteSize);
    187         if (!srcData || srcCh == 0) continue;
    188         const UInt32 frames = outFrames < srcFrames ? outFrames : srcFrames;
    189         const UInt32 ch = outCh < srcCh ? outCh : srcCh;
    190         for (UInt32 f = 0; f < frames; f++) {
    191             for (UInt32 c = 0; c < ch; c++) {
    192                 float v = srcData[f * srcCh + c] * g;
    193                 out[f * outCh + c] = v;
    194                 float av = fabsf(v);
    195                 if (av > peak) peak = av;
    196             }
    197         }
    198     }
    199     P->peak[0] = peak;
    200 }
    201 
    202 // Read the aggregate's nominal sample rate into params (for the WAV header).
    203 static void hydra_publish_rate(AudioObjectID agg, HydraParams *P) {
    204     Float64 sr = 0;
    205     UInt32 srSize = sizeof(sr);
    206     AudioObjectPropertyAddress srAddr = {
    207         kAudioDevicePropertyNominalSampleRate, kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyElementMain
    208     };
    209     if (AudioObjectGetPropertyData(agg, &srAddr, 0, NULL, &srSize, &sr) == noErr && sr > 0) {
    210         __c11_atomic_store(&P->fmt_sample_rate, (unsigned int)sr, __ATOMIC_RELEASE);
    211     }
    212 }
    213 
    214 // Build a route capturing a hardware INPUT device (e.g. the MacBook mic) to an output.
    215 // No process tap, no TCC: just an aggregate of [input, output] with the shared IOProc
    216 // reading the input device's own stream (buffer 0). Returns an OSStatus.
    217 OSStatus hydra_input_start(const char  *inputUID,   // required: the source input device
    218                            const char  *outputUID,  // nullable -> default output
    219                            HydraParams *params,
    220                            HydraRoute  *outRoute) {
    221     if (!inputUID || !inputUID[0] || !params || !outRoute) return kAudio_ParamError;
    222     @autoreleasepool {
    223         AudioObjectID inDev = hydra_device_for_uid(inputUID);
    224         NSString *inUID = hydra_device_uid(inDev);
    225         if (inUID == nil) return kAudioHardwareBadDeviceError;
    226 
    227         AudioObjectID outDev = (outputUID && outputUID[0]) ? hydra_device_for_uid(outputUID)
    228                                                            : hydra_default_output();
    229         NSString *outUID = hydra_device_uid(outDev);
    230         if (outUID == nil) return kAudioHardwareBadDeviceError;
    231 
    232         // Private aggregate: input device first (its stream is input buffer 0), output as the
    233         // clock master. No tap list.
    234         NSString *aggUID = [[NSUUID UUID] UUIDString];
    235         NSDictionary *description = @{
    236             @(kAudioAggregateDeviceNameKey):          @"Hydra Input",
    237             @(kAudioAggregateDeviceUIDKey):           aggUID,
    238             @(kAudioAggregateDeviceIsPrivateKey):     @YES,
    239             @(kAudioAggregateDeviceMainSubDeviceKey): outUID,
    240             @(kAudioAggregateDeviceSubDeviceListKey): @[
    241                 @{ @(kAudioSubDeviceUIDKey): inUID },
    242                 @{ @(kAudioSubDeviceUIDKey): outUID },
    243             ],
    244         };
    245         AudioObjectID agg = 0;
    246         OSStatus st = AudioHardwareCreateAggregateDevice((__bridge CFDictionaryRef)description, &agg);
    247         if (st != noErr || agg == 0) return st != noErr ? st : kAudioHardwareUnspecifiedError;
    248 
    249         HydraParams *P = params;
    250         AudioDeviceIOProcID procID = NULL;
    251         st = AudioDeviceCreateIOProcIDWithBlock(&procID, agg, NULL,
    252             ^(const AudioTimeStamp *now, const AudioBufferList *inD, const AudioTimeStamp *inT,
    253               AudioBufferList *outD, const AudioTimeStamp *outT) {
    254                 (void)now; (void)inT; (void)outT;
    255                 hydra_render(P, inD, outD, /*useLast=*/0); // input device = first buffer
    256             });
    257         if (st != noErr || procID == NULL) {
    258             AudioHardwareDestroyAggregateDevice(agg);
    259             return st != noErr ? st : kAudioHardwareUnspecifiedError;
    260         }
    261         hydra_publish_rate(agg, P);
    262         st = AudioDeviceStart(agg, procID);
    263         if (st != noErr) {
    264             AudioDeviceDestroyIOProcID(agg, procID);
    265             AudioHardwareDestroyAggregateDevice(agg);
    266             return st;
    267         }
    268         P->running = 1;
    269         outRoute->tap = 0; // no tap; teardown handles tap==0
    270         outRoute->aggregate = agg;
    271         outRoute->ioproc = procID;
    272         outRoute->params = P;
    273         return noErr;
    274     }
    275 }
    276 
    277 // Build tap + aggregate + IOProc and start it. Returns an OSStatus (noErr on success).
    278 OSStatus hydra_monitor_start(const AudioObjectID *procObjs,
    279                              int                  nProcs,
    280                              const char          *outputUID, // nullable -> default output
    281                              HydraParams         *params,
    282                              HydraRoute          *outRoute) {
    283     if (!procObjs || nProcs <= 0 || !params || !outRoute) {
    284         return kAudio_ParamError;
    285     }
    286     @autoreleasepool {
    287         // 1. Tap description over the requested processes (stereo mixdown).
    288         NSMutableArray<NSNumber *> *procs = [NSMutableArray arrayWithCapacity:nProcs];
    289         for (int i = 0; i < nProcs; i++) {
    290             [procs addObject:@(procObjs[i])];
    291         }
    292         CATapDescription *desc = [[CATapDescription alloc] initStereoMixdownOfProcesses:procs];
    293         desc.name = @"Hydra Tap";
    294         desc.UUID = [NSUUID UUID];
    295         desc.muteBehavior = CATapUnmuted; // keep the app audible while we tap it
    296 
    297         // 2. Create the process tap.
    298         AudioObjectID tapID = 0;
    299         OSStatus st = AudioHardwareCreateProcessTap(desc, &tapID);
    300         if (st != noErr || tapID == 0) {
    301             return st != noErr ? st : kAudioHardwareUnspecifiedError;
    302         }
    303 
    304         // 3. Resolve the output device + its UID for the aggregate's sub-device list.
    305         AudioObjectID outDev = (outputUID && outputUID[0]) ? hydra_device_for_uid(outputUID)
    306                                                            : hydra_default_output();
    307         NSString *outUID = hydra_device_uid(outDev);
    308         if (outUID == nil) {
    309             AudioHardwareDestroyProcessTap(tapID);
    310             return kAudioHardwareBadDeviceError;
    311         }
    312 
    313         // 4. Private aggregate: hardware output as sub-device, tap folded in as input.
    314         //    The aggregate dictionary keys are C-string #defines, so box them with @(...).
    315         NSString *aggUID = [[NSUUID UUID] UUIDString];
    316         NSDictionary *description = @{
    317             @(kAudioAggregateDeviceNameKey):          @"Hydra Monitor",
    318             @(kAudioAggregateDeviceUIDKey):           aggUID,
    319             @(kAudioAggregateDeviceIsPrivateKey):     @YES,
    320             @(kAudioAggregateDeviceMainSubDeviceKey): outUID,
    321             @(kAudioAggregateDeviceSubDeviceListKey): @[
    322                 @{ @(kAudioSubDeviceUIDKey): outUID }
    323             ],
    324             @(kAudioAggregateDeviceTapListKey): @[
    325                 @{ @(kAudioSubTapUIDKey): desc.UUID.UUIDString }
    326             ],
    327             @(kAudioAggregateDeviceTapAutoStartKey): @YES,
    328         };
    329 
    330         AudioObjectID agg = 0;
    331         st = AudioHardwareCreateAggregateDevice((__bridge CFDictionaryRef)description, &agg);
    332         if (st != noErr || agg == 0) {
    333             AudioHardwareDestroyProcessTap(tapID);
    334             return st != noErr ? st : kAudioHardwareUnspecifiedError;
    335         }
    336 
    337         // 5. IOProc: the shared render reads the tap (LAST input buffer) → output.
    338         HydraParams *P = params;
    339         AudioDeviceIOProcID procID = NULL;
    340         st = AudioDeviceCreateIOProcIDWithBlock(&procID, agg, NULL,
    341             ^(const AudioTimeStamp *now, const AudioBufferList *inData,
    342               const AudioTimeStamp *inTime, AudioBufferList *outData,
    343               const AudioTimeStamp *outTime) {
    344                 (void)now; (void)inTime; (void)outTime;
    345                 hydra_render(P, inData, outData, /*useLast=*/1); // tap = last input buffer
    346             });
    347         if (st != noErr || procID == NULL) {
    348             AudioHardwareDestroyAggregateDevice(agg);
    349             AudioHardwareDestroyProcessTap(tapID);
    350             return st != noErr ? st : kAudioHardwareUnspecifiedError;
    351         }
    352 
    353         hydra_publish_rate(agg, P);
    354 
    355         st = AudioDeviceStart(agg, procID);
    356         if (st != noErr) {
    357             AudioDeviceDestroyIOProcID(agg, procID);
    358             AudioHardwareDestroyAggregateDevice(agg);
    359             AudioHardwareDestroyProcessTap(tapID);
    360             return st;
    361         }
    362 
    363         P->running = 1;
    364         outRoute->tap = tapID;
    365         outRoute->aggregate = agg;
    366         outRoute->ioproc = procID;
    367         outRoute->params = P;
    368         return noErr;
    369     }
    370 }
    371 
    372 // Tear down in the order that keeps coreaudiod happy: stop IO, remove the IOProc, destroy
    373 // the aggregate (which references the tap), then destroy the tap.
    374 void hydra_monitor_stop(HydraRoute *r) {
    375     if (!r) return;
    376     if (r->aggregate && r->ioproc) {
    377         AudioDeviceStop(r->aggregate, r->ioproc);
    378         AudioDeviceDestroyIOProcID(r->aggregate, r->ioproc);
    379     }
    380     if (r->aggregate) AudioHardwareDestroyAggregateDevice(r->aggregate);
    381     if (r->tap)       AudioHardwareDestroyProcessTap(r->tap);
    382     if (r->params)    r->params->running = 0;
    383     r->tap = 0;
    384     r->aggregate = 0;
    385     r->ioproc = NULL;
    386 }