JDK14/Java14源码在线阅读

JDK14/Java14源码在线阅读 / hotspot / os / linux / osContainer_linux.cpp
/*
 * Copyright (c) 2017, 2019, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#include <string.h>
#include <math.h>
#include <errno.h>
#include "utilities/globalDefinitions.hpp"
#include "memory/allocation.hpp"
#include "runtime/globals.hpp"
#include "runtime/os.hpp"
#include "logging/log.hpp"
#include "osContainer_linux.hpp"

/*
 * PER_CPU_SHARES has been set to 1024 because CPU shares' quota
 * is commonly used in cloud frameworks like Kubernetes[1],
 * AWS[2] and Mesos[3] in a similar way. They spawn containers with
 * --cpu-shares option values scaled by PER_CPU_SHARES. Thus, we do
 * the inverse for determining the number of possible available
 * CPUs to the JVM inside a container. See JDK-8216366.
 *
 * [1] https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-cpu
 *     In particular:
 *        When using Docker:
 *          The spec.containers[].resources.requests.cpu is converted to its core value, which is potentially
 *          fractional, and multiplied by 1024. The greater of this number or 2 is used as the value of the
 *          --cpu-shares flag in the docker run command.
 * [2] https://docs.aws.amazon.com/AmazonECS/latest/APIReference/API_ContainerDefinition.html
 * [3] https://github.com/apache/mesos/blob/3478e344fb77d931f6122980c6e94cd3913c441d/src/docker/docker.cpp#L648
 *     https://github.com/apache/mesos/blob/3478e344fb77d931f6122980c6e94cd3913c441d/src/slave/containerizer/mesos/isolators/cgroups/constants.hpp#L30
 */
#define PER_CPU_SHARES 1024

bool  OSContainer::_is_initialized   = false;
bool  OSContainer::_is_containerized = false;
int   OSContainer::_active_processor_count = 1;
julong _unlimited_memory;

class CgroupSubsystem: CHeapObj<mtInternal> {
 friend class OSContainer;


 private:
    volatile jlong _next_check_counter;

    /* mountinfo contents */
    char *_root;
    char *_mount_point;

    /* Constructed subsystem directory */
    char *_path;

 public:
    CgroupSubsystem(char *root, char *mountpoint) {
      _root = os::strdup(root);
      _mount_point = os::strdup(mountpoint);
      _path = NULL;
      _next_check_counter = min_jlong;
    }

    /*
     * Set directory to subsystem specific files based
     * on the contents of the mountinfo and cgroup files.
     */
    void set_subsystem_path(char *cgroup_path) {
      char buf[MAXPATHLEN+1];
      if (_root != NULL && cgroup_path != NULL) {
        if (strcmp(_root, "/") == 0) {
          int buflen;
          strncpy(buf, _mount_point, MAXPATHLEN);
          buf[MAXPATHLEN-1] = '\0';
          if (strcmp(cgroup_path,"/") != 0) {
            buflen = strlen(buf);
            if ((buflen + strlen(cgroup_path)) > (MAXPATHLEN-1)) {
              return;
            }
            strncat(buf, cgroup_path, MAXPATHLEN-buflen);
            buf[MAXPATHLEN-1] = '\0';
          }
          _path = os::strdup(buf);
        } else {
          if (strcmp(_root, cgroup_path) == 0) {
            strncpy(buf, _mount_point, MAXPATHLEN);
            buf[MAXPATHLEN-1] = '\0';
            _path = os::strdup(buf);
          } else {
            char *p = strstr(cgroup_path, _root);
            if (p != NULL && p == _root) {
              if (strlen(cgroup_path) > strlen(_root)) {
                int buflen;
                strncpy(buf, _mount_point, MAXPATHLEN);
                buf[MAXPATHLEN-1] = '\0';
                buflen = strlen(buf);
                if ((buflen + strlen(cgroup_path) - strlen(_root)) > (MAXPATHLEN-1)) {
                  return;
                }
                strncat(buf, cgroup_path + strlen(_root), MAXPATHLEN-buflen);
                buf[MAXPATHLEN-1] = '\0';
                _path = os::strdup(buf);
              }
            }
          }
        }
      }
    }

    char *subsystem_path() { return _path; }

    bool cache_has_expired() {
      return os::elapsed_counter() > _next_check_counter;
    }

    void set_cache_expiry_time(jlong timeout) {
      _next_check_counter = os::elapsed_counter() + timeout;
    }
};

class CgroupMemorySubsystem: CgroupSubsystem {
 friend class OSContainer;

 private:
    /* Some container runtimes set limits via cgroup
     * hierarchy. If set to true consider also memory.stat
     * file if everything else seems unlimited */
    bool _uses_mem_hierarchy;
    volatile jlong _memory_limit_in_bytes;

 public:
    CgroupMemorySubsystem(char *root, char *mountpoint) : CgroupSubsystem::CgroupSubsystem(root, mountpoint) {
      _uses_mem_hierarchy = false;
      _memory_limit_in_bytes = -1;

    }

    bool is_hierarchical() { return _uses_mem_hierarchy; }
    void set_hierarchical(bool value) { _uses_mem_hierarchy = value; }

    jlong memory_limit_in_bytes() { return _memory_limit_in_bytes; }
    void set_memory_limit_in_bytes(jlong value) {
      _memory_limit_in_bytes = value;
      // max memory limit is unlikely to change, but we want to remain
      // responsive to configuration changes. A very short grace time
      // between re-read avoids excessive overhead during startup without
      // significantly reducing the VMs ability to promptly react to reduced
      // memory availability
      set_cache_expiry_time(OSCONTAINER_CACHE_TIMEOUT);
    }

};

CgroupMemorySubsystem* memory = NULL;
CgroupSubsystem* cpuset = NULL;
CgroupSubsystem* cpu = NULL;
CgroupSubsystem* cpuacct = NULL;

typedef char * cptr;

PRAGMA_DIAG_PUSH
PRAGMA_FORMAT_NONLITERAL_IGNORED
template <typename T> int subsystem_file_line_contents(CgroupSubsystem* c,
                                              const char *filename,
                                              const char *matchline,
                                              const char *scan_fmt,
                                              T returnval) {
  FILE *fp = NULL;
  char *p;
  char file[MAXPATHLEN+1];
  char buf[MAXPATHLEN+1];
  char discard[MAXPATHLEN+1];
  bool found_match = false;

  if (c == NULL) {
    log_debug(os, container)("subsystem_file_line_contents: CgroupSubsytem* is NULL");
    return OSCONTAINER_ERROR;
  }
  if (c->subsystem_path() == NULL) {
    log_debug(os, container)("subsystem_file_line_contents: subsystem path is NULL");
    return OSCONTAINER_ERROR;
  }

  strncpy(file, c->subsystem_path(), MAXPATHLEN);
  file[MAXPATHLEN-1] = '\0';
  int filelen = strlen(file);
  if ((filelen + strlen(filename)) > (MAXPATHLEN-1)) {
    log_debug(os, container)("File path too long %s, %s", file, filename);
    return OSCONTAINER_ERROR;
  }
  strncat(file, filename, MAXPATHLEN-filelen);
  log_trace(os, container)("Path to %s is %s", filename, file);
  fp = fopen(file, "r");
  if (fp != NULL) {
    int err = 0;
    while ((p = fgets(buf, MAXPATHLEN, fp)) != NULL) {
      found_match = false;
      if (matchline == NULL) {
        // single-line file case
        int matched = sscanf(p, scan_fmt, returnval);
        found_match = (matched == 1);
      } else {
        // multi-line file case
        if (strstr(p, matchline) != NULL) {
          // discard matchline string prefix
          int matched = sscanf(p, scan_fmt, discard, returnval);
          found_match = (matched == 2);
        } else {
          continue; // substring not found
        }
      }
      if (found_match) {
        fclose(fp);
        return 0;
      } else {
        err = 1;
        log_debug(os, container)("Type %s not found in file %s", scan_fmt, file);
      }
    }
    if (err == 0) {
      log_debug(os, container)("Empty file %s", file);
    }
  } else {
    log_debug(os, container)("Open of file %s failed, %s", file, os::strerror(errno));
  }
  if (fp != NULL)
    fclose(fp);
  return OSCONTAINER_ERROR;
}
PRAGMA_DIAG_POP

#define GET_CONTAINER_INFO(return_type, subsystem, filename,              \
                           logstring, scan_fmt, variable)                 \
  return_type variable;                                                   \
{                                                                         \
  int err;                                                                \
  err = subsystem_file_line_contents(subsystem,                           \
                                     filename,                            \
                                     NULL,                                \
                                     scan_fmt,                            \
                                     &variable);                          \
  if (err != 0)                                                           \
    return (return_type) OSCONTAINER_ERROR;                               \
                                                                          \
  log_trace(os, container)(logstring, variable);                          \
}

#define GET_CONTAINER_INFO_CPTR(return_type, subsystem, filename,         \
                               logstring, scan_fmt, variable, bufsize)    \
  char variable[bufsize];                                                 \
{                                                                         \
  int err;                                                                \
  err = subsystem_file_line_contents(subsystem,                           \
                                     filename,                            \
                                     NULL,                                \
                                     scan_fmt,                            \
                                     variable);                           \
  if (err != 0)                                                           \
    return (return_type) NULL;                                            \
                                                                          \
  log_trace(os, container)(logstring, variable);                          \
}

#define GET_CONTAINER_INFO_LINE(return_type, subsystem, filename,         \
                           matchline, logstring, scan_fmt, variable)      \
  return_type variable;                                                   \
{                                                                         \
  int err;                                                                \
  err = subsystem_file_line_contents(subsystem,                           \
                                filename,                                 \
                                matchline,                                \
                                scan_fmt,                                 \
                                &variable);                               \
  if (err != 0)                                                           \
    return (return_type) OSCONTAINER_ERROR;                               \
                                                                          \
  log_trace(os, container)(logstring, variable);                          \
}

/* init
 *
 * Initialize the container support and determine if
 * we are running under cgroup control.
 */
void OSContainer::init() {
  FILE *mntinfo = NULL;
  FILE *cgroup = NULL;
  char buf[MAXPATHLEN+1];
  char tmproot[MAXPATHLEN+1];
  char tmpmount[MAXPATHLEN+1];
  char *p;
  jlong mem_limit;

  assert(!_is_initialized, "Initializing OSContainer more than once");

  _is_initialized = true;
  _is_containerized = false;

  _unlimited_memory = (LONG_MAX / os::vm_page_size()) * os::vm_page_size();

  log_trace(os, container)("OSContainer::init: Initializing Container Support");
  if (!UseContainerSupport) {
    log_trace(os, container)("Container Support not enabled");
    return;
  }

  /*
   * Find the cgroup mount point for memory and cpuset
   * by reading /proc/self/mountinfo
   *
   * Example for docker:
   * 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory
   *
   * Example for host:
   * 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory
   */
  mntinfo = fopen("/proc/self/mountinfo", "r");
  if (mntinfo == NULL) {
      log_debug(os, container)("Can't open /proc/self/mountinfo, %s",
                               os::strerror(errno));
      return;
  }

  while ((p = fgets(buf, MAXPATHLEN, mntinfo)) != NULL) {
    char tmpcgroups[MAXPATHLEN+1];
    char *cptr = tmpcgroups;
    char *token;

    // mountinfo format is documented at https://www.kernel.org/doc/Documentation/filesystems/proc.txt
    if (sscanf(p, "%*d %*d %*d:%*d %s %s %*[^-]- cgroup %*s %s", tmproot, tmpmount, tmpcgroups) != 3) {
      continue;
    }
    while ((token = strsep(&cptr, ",")) != NULL) {
      if (strcmp(token, "memory") == 0) {
        memory = new CgroupMemorySubsystem(tmproot, tmpmount);
      } else if (strcmp(token, "cpuset") == 0) {
        cpuset = new CgroupSubsystem(tmproot, tmpmount);
      } else if (strcmp(token, "cpu") == 0) {
        cpu = new CgroupSubsystem(tmproot, tmpmount);
      } else if (strcmp(token, "cpuacct") == 0) {
        cpuacct= new CgroupSubsystem(tmproot, tmpmount);
      }
    }
  }

  fclose(mntinfo);

  if (memory == NULL) {
    log_debug(os, container)("Required cgroup memory subsystem not found");
    return;
  }
  if (cpuset == NULL) {
    log_debug(os, container)("Required cgroup cpuset subsystem not found");
    return;
  }
  if (cpu == NULL) {
    log_debug(os, container)("Required cgroup cpu subsystem not found");
    return;
  }
  if (cpuacct == NULL) {
    log_debug(os, container)("Required cgroup cpuacct subsystem not found");
    return;
  }

  /*
   * Read /proc/self/cgroup and map host mount point to
   * local one via /proc/self/mountinfo content above
   *
   * Docker example:
   * 5:memory:/docker/6558aed8fc662b194323ceab5b964f69cf36b3e8af877a14b80256e93aecb044
   *
   * Host example:
   * 5:memory:/user.slice
   *
   * Construct a path to the process specific memory and cpuset
   * cgroup directory.
   *
   * For a container running under Docker from memory example above
   * the paths would be:
   *
   * /sys/fs/cgroup/memory
   *
   * For a Host from memory example above the path would be:
   *
   * /sys/fs/cgroup/memory/user.slice
   *
   */
  cgroup = fopen("/proc/self/cgroup", "r");
  if (cgroup == NULL) {
    log_debug(os, container)("Can't open /proc/self/cgroup, %s",
                             os::strerror(errno));
    return;
  }

  while ((p = fgets(buf, MAXPATHLEN, cgroup)) != NULL) {
    char *controllers;
    char *token;
    char *base;

    /* Skip cgroup number */
    strsep(&p, ":");
    /* Get controllers and base */
    controllers = strsep(&p, ":");
    base = strsep(&p, "\n");

    if (controllers == NULL) {
      continue;
    }

    while ((token = strsep(&controllers, ",")) != NULL) {
      if (strcmp(token, "memory") == 0) {
        memory->set_subsystem_path(base);
        jlong hierarchy = uses_mem_hierarchy();
        if (hierarchy > 0) {
          memory->set_hierarchical(true);
        }
      } else if (strcmp(token, "cpuset") == 0) {
        cpuset->set_subsystem_path(base);
      } else if (strcmp(token, "cpu") == 0) {
        cpu->set_subsystem_path(base);
      } else if (strcmp(token, "cpuacct") == 0) {
        cpuacct->set_subsystem_path(base);
      }
    }
  }

  fclose(cgroup);

  // We need to update the amount of physical memory now that
  // command line arguments have been processed.
  if ((mem_limit = memory_limit_in_bytes()) > 0) {
    os::Linux::set_physical_memory(mem_limit);
    log_info(os, container)("Memory Limit is: " JLONG_FORMAT, mem_limit);
  }

  _is_containerized = true;

}

const char * OSContainer::container_type() {
  if (is_containerized()) {
    return "cgroupv1";
  } else {
    return NULL;
  }
}

/* uses_mem_hierarchy
 *
 * Return whether or not hierarchical cgroup accounting is being
 * done.
 *
 * return:
 *    A number > 0 if true, or
 *    OSCONTAINER_ERROR for not supported
 */
jlong OSContainer::uses_mem_hierarchy() {
  GET_CONTAINER_INFO(jlong, memory, "/memory.use_hierarchy",
                    "Use Hierarchy is: " JLONG_FORMAT, JLONG_FORMAT, use_hierarchy);
  return use_hierarchy;
}


/* memory_limit_in_bytes
 *
 * Return the limit of available memory for this process.
 *
 * return:
 *    memory limit in bytes or
 *    -1 for unlimited
 *    OSCONTAINER_ERROR for not supported
 */
jlong OSContainer::memory_limit_in_bytes() {
  if (!memory->cache_has_expired()) {
    return memory->memory_limit_in_bytes();
  }
  jlong memory_limit = read_memory_limit_in_bytes();
  // Update CgroupMemorySubsystem to avoid re-reading container settings too often
  memory->set_memory_limit_in_bytes(memory_limit);
  return memory_limit;
}

jlong OSContainer::read_memory_limit_in_bytes() {
  GET_CONTAINER_INFO(julong, memory, "/memory.limit_in_bytes",
                     "Memory Limit is: " JULONG_FORMAT, JULONG_FORMAT, memlimit);

  if (memlimit >= _unlimited_memory) {
    log_trace(os, container)("Non-Hierarchical Memory Limit is: Unlimited");
    if (memory->is_hierarchical()) {
      const char* matchline = "hierarchical_memory_limit";
      const char* format = "%s " JULONG_FORMAT;
      GET_CONTAINER_INFO_LINE(julong, memory, "/memory.stat", matchline,
                             "Hierarchical Memory Limit is: " JULONG_FORMAT, format, hier_memlimit)
      if (hier_memlimit >= _unlimited_memory) {
        log_trace(os, container)("Hierarchical Memory Limit is: Unlimited");
      } else {
        return (jlong)hier_memlimit;
      }
    }
    return (jlong)-1;
  }
  else {
    return (jlong)memlimit;
  }
}

jlong OSContainer::memory_and_swap_limit_in_bytes() {
  GET_CONTAINER_INFO(julong, memory, "/memory.memsw.limit_in_bytes",
                     "Memory and Swap Limit is: " JULONG_FORMAT, JULONG_FORMAT, memswlimit);
  if (memswlimit >= _unlimited_memory) {
    log_trace(os, container)("Non-Hierarchical Memory and Swap Limit is: Unlimited");
    if (memory->is_hierarchical()) {
      const char* matchline = "hierarchical_memsw_limit";
      const char* format = "%s " JULONG_FORMAT;
      GET_CONTAINER_INFO_LINE(julong, memory, "/memory.stat", matchline,
                             "Hierarchical Memory and Swap Limit is : " JULONG_FORMAT, format, hier_memlimit)
      if (hier_memlimit >= _unlimited_memory) {
        log_trace(os, container)("Hierarchical Memory and Swap Limit is: Unlimited");
      } else {
        return (jlong)hier_memlimit;
      }
    }
    return (jlong)-1;
  } else {
    return (jlong)memswlimit;
  }
}

jlong OSContainer::memory_soft_limit_in_bytes() {
  GET_CONTAINER_INFO(julong, memory, "/memory.soft_limit_in_bytes",
                     "Memory Soft Limit is: " JULONG_FORMAT, JULONG_FORMAT, memsoftlimit);
  if (memsoftlimit >= _unlimited_memory) {
    log_trace(os, container)("Memory Soft Limit is: Unlimited");
    return (jlong)-1;
  } else {
    return (jlong)memsoftlimit;
  }
}

/* memory_usage_in_bytes
 *
 * Return the amount of used memory for this process.
 *
 * return:
 *    memory usage in bytes or
 *    -1 for unlimited
 *    OSCONTAINER_ERROR for not supported
 */
jlong OSContainer::memory_usage_in_bytes() {
  GET_CONTAINER_INFO(jlong, memory, "/memory.usage_in_bytes",
                     "Memory Usage is: " JLONG_FORMAT, JLONG_FORMAT, memusage);
  return memusage;
}

/* memory_max_usage_in_bytes
 *
 * Return the maximum amount of used memory for this process.
 *
 * return:
 *    max memory usage in bytes or
 *    OSCONTAINER_ERROR for not supported
 */
jlong OSContainer::memory_max_usage_in_bytes() {
  GET_CONTAINER_INFO(jlong, memory, "/memory.max_usage_in_bytes",
                     "Maximum Memory Usage is: " JLONG_FORMAT, JLONG_FORMAT, memmaxusage);
  return memmaxusage;
}

/* active_processor_count
 *
 * Calculate an appropriate number of active processors for the
 * VM to use based on these three inputs.
 *
 * cpu affinity
 * cgroup cpu quota & cpu period
 * cgroup cpu shares
 *
 * Algorithm:
 *
 * Determine the number of available CPUs from sched_getaffinity
 *
 * If user specified a quota (quota != -1), calculate the number of
 * required CPUs by dividing quota by period.
 *
 * If shares are in effect (shares != -1), calculate the number
 * of CPUs required for the shares by dividing the share value
 * by PER_CPU_SHARES.
 *
 * All results of division are rounded up to the next whole number.
 *
 * If neither shares or quotas have been specified, return the
 * number of active processors in the system.
 *
 * If both shares and quotas have been specified, the results are
 * based on the flag PreferContainerQuotaForCPUCount.  If true,
 * return the quota value.  If false return the smallest value
 * between shares or quotas.
 *
 * If shares and/or quotas have been specified, the resulting number
 * returned will never exceed the number of active processors.
 *
 * return:
 *    number of CPUs
 */
int OSContainer::active_processor_count() {
  int quota_count = 0, share_count = 0;
  int cpu_count, limit_count;
  int result;

  // We use a cache with a timeout to avoid performing expensive
  // computations in the event this function is called frequently.
  // [See 8227006].
  if (!cpu->cache_has_expired()) {
    log_trace(os, container)("OSContainer::active_processor_count (cached): %d", OSContainer::_active_processor_count);
    return OSContainer::_active_processor_count;
  }

  cpu_count = limit_count = os::Linux::active_processor_count();
  int quota  = cpu_quota();
  int period = cpu_period();
  int share  = cpu_shares();

  if (quota > -1 && period > 0) {
    quota_count = ceilf((float)quota / (float)period);

/**代码未完, 请加载全部代码(NowJava.com).**/
展开阅读全文

关注时代Java

关注时代Java