Is getgrgid_r or getgrname_r failing on you inexplicably?

Take this code, which on Linux accepts a group name as input on the command line & outputs the associated gid:

#include 
#include 

#include 
#include 
#include 

#include 

int
main (int argc, char **argv)
{
  const char *name;
  struct group groupbuf;
  struct group *group;
  char *buf;
  long size;

  if (argc < 2) {
    fprintf (stderr, "usage: %s n", argv[0]);
    return 1;
  }
  name = argv[1];

  size = sysconf (_SC_GETGR_R_SIZE_MAX);
  if (size == -1) {
    fprintf (stderr, "error: could not get _SC_GETGR_R_SIZE_MAXn");
    return 1;
  }

  buf = malloc ((size_t) size);
  if (buf == NULL) {
    fprintf (stderr, "error: malloc() failedn");
    return 1;
  }

  if (getgrnam_r (name, &groupbuf, buf, (size_t) size, &group) != 0) {
    free (buf);
    fprintf (stderr,
      "error: getgrnam_r failed with errno=%dn", errno);
    break;
    return 1;
  }

  if (group == NULL) {
    fprintf (stderr, "error: group not found: %sn", name);
    return 1;
  }

  printf ("%un", group->gr_gid);
  return 0;
}

Sane enough, right? Yet if you run this code on a system against a group with say, a thousand members you’ll find that the getgrnam_r call fails.

It turns out that _SC_GETGR_R_SIZE_MAX is not the maximum size of the input buffer. If you look at the declaration of struct group, you can probably guess why:

struct group
  {
    char *gr_name;      /* Group name.  */
    char *gr_passwd;        /* Password.    */
    __gid_t gr_gid;     /* Group ID.    */
    char **gr_mem;      /* Member list. */
  };

See that last field, gr_mem? That will be populated with the username of every member of this group. So if you have a lot of users in a group, that code up there will fail miserably with the 1k or so that _SC_GETGR_R_SIZE_MAX yields.

Turns out that the value associated with _SC_GETGR_R_SIZE_MAX is merely “an initial suggested size for buf” according to the man page for getgrgid_r. It doesn’t help that there are older versions of the man page out there that don’t call this out explicitly.

So how do you handle getgr{nam,gid}_r calls properly for large groups? Why, you check errno for ERANGE & loop to try again with a bigger buffer. Obviously.

#include 
#include 

#include 
#include 
#include 

#include 

int
main (int argc, char **argv)
{
  static const long max_size = 16 * 1024;

  const char *name;
  struct group groupbuf;
  struct group *group;
  char *temp;
  char *buf;
  long size;
  long realsize;

  if (argc < 2) {
    fprintf (stderr, "usage: %s n", argv[0]);
    return 1;
  }
  name = argv[1];

  size = sysconf (_SC_GETGR_R_SIZE_MAX);
  if (size == -1) {
    fprintf (stderr, "error: could not get _SC_GETGR_R_SIZE_MAXn");
    return 1;
  }
  realsize = size;

  buf = malloc ((size_t) size);
  if (buf == NULL) {
    fprintf (stderr, "error: malloc() failedn");
    return 1;
  }

retry:
  if (getgrnam_r (name, &groupbuf, buf, (size_t) size, &group) != 0) {
    switch (errno) {
      case ERANGE:
        if ((size   realsize) < size || (size   realsize) > max_size) {
          fprintf (stderr, "error: buffer limit reachedn");
          break;
        }
        /* grow the buffer by 'realsize' each time getgrnam_r fails */
        size  = realsize;
        temp = realloc (buf, size);
        if (temp == NULL) {
          fprintf (stderr, "error: realloc() failedn");
          break;
        }
        buf = temp;
        goto retry;
      default:
        fprintf (stderr,
           "error: getgrnam_r failed with errno=%dn", errno);
        break;
    }
    free(buf);
    return 1;
  }

  if (group == NULL) {
    fprintf (stderr, "error: group not found: %sn", name);
    return 1;
  }

  printf ("%un", group->gr_gid);
  return 0;
}