# -*- encoding: utf-8 -*-
"""
Local server.
`H2OLocalServer` allows to start H2O servers on your local machine:
hs = H2OLocalServer.start() : start a new local server
hs.is_running() : check if the server is running
hs.shutdown() : shut down the server
:copyright: (c) 2016 H2O.ai
:license: Apache License Version 2.0 (see LICENSE for details)
"""
from __future__ import absolute_import, division, print_function, unicode_literals
import atexit
import os
import subprocess
import sys
import tempfile
import time
from random import choice
from sysconfig import get_config_var
from warnings import warn
from h2o.exceptions import H2OServerError, H2OStartupError, H2OValueError
from h2o.utils.compatibility import * # NOQA
from h2o.utils.typechecks import assert_is_type, assert_satisfies, BoundInt, I, is_type
__all__ = ("H2OLocalServer", )
[docs]class H2OLocalServer(object):
"""
Handle to an H2O server launched locally.
Public interface::
hs = H2OLocalServer.start(...) # launch a new local H2O server
hs.is_running() # check if the server is running
hs.shutdown() # shut down the server
hs.scheme # either "http" or "https"
hs.ip # ip address of the server, typically "127.0.0.1"
hs.port # port on which the server is listening
Once started, the server will run until the script terminates, or until you call `.shutdown()` on it. Moreover,
if the server terminates with an exception, then the server will not stop and will continue to run even after
Python process exits. This runaway process may end up being in a bad shape (e.g. frozen), then the only way to
terminate it is to kill the java process from the terminal.
Alternatively, it is possible to start the server as a context manager, in which case it will be automatically
shut down even if an exception occurs in Python (but not if the Python process is killed)::
with H2OLocalServer.start() as hs:
# do something with the server -- probably connect to it
"""
## (Avkash) Changing the maximum time to wait as 60 seconds to match with the same time in to R API. ##
_TIME_TO_START = 60 # Maximum time we wait for the server to start up (in seconds)
_TIME_TO_KILL = 3 # Maximum time we wait for the server to shut down until we kill it (in seconds)
[docs] @staticmethod
def start(jar_path=None, nthreads=-1, enable_assertions=True, max_mem_size=None, min_mem_size=None,
ice_root=None, port="54321+", extra_classpath=None, verbose=True):
"""
Start new H2O server on the local machine.
:param jar_path: Path to the h2o.jar executable. If not given, then we will search for h2o.jar in the
locations returned by `._jar_paths()`.
:param nthreads: Number of threads in the thread pool. This should be related to the number of CPUs used.
-1 means use all CPUs on the host. A positive integer specifies the number of CPUs directly.
:param enable_assertions: If True, pass `-ea` option to the JVM.
:param max_mem_size: Maximum heap size (jvm option Xmx), in bytes.
:param min_mem_size: Minimum heap size (jvm option Xms), in bytes.
:param ice_root: A directory where H2O stores its temporary files. Default location is determined by
tempfile.mkdtemp().
:param port: Port where to start the new server. This could be either an integer, or a string of the form
"DDDDD+", indicating that the server should start looking for an open port starting from DDDDD and up.
:param extra_classpath List of paths to libraries that should be included on the Java classpath.
:param verbose: If True, then connection info will be printed to the stdout.
:returns: a new H2OLocalServer instance
"""
assert_is_type(jar_path, None, str)
assert_is_type(port, None, int, str)
assert_is_type(nthreads, -1, BoundInt(1, 4096))
assert_is_type(enable_assertions, bool)
assert_is_type(min_mem_size, None, int)
assert_is_type(max_mem_size, None, BoundInt(1 << 25))
assert_is_type(ice_root, None, I(str, os.path.isdir))
assert_is_type(extra_classpath, None, [str])
if jar_path:
assert_satisfies(jar_path, jar_path.endswith("h2o.jar"))
if min_mem_size is not None and max_mem_size is not None and min_mem_size > max_mem_size:
raise H2OValueError("`min_mem_size`=%d is larger than the `max_mem_size`=%d" % (min_mem_size, max_mem_size))
if port is None: port = "54321+"
baseport = None
# TODO: get rid of this port gimmick and have 2 separate parameters.
if is_type(port, str):
if port.isdigit():
port = int(port)
else:
if not(port[-1] == "+" and port[:-1].isdigit()):
raise H2OValueError("`port` should be of the form 'DDDD+', where D is a digit. Got: %s" % port)
baseport = int(port[:-1])
port = 0
hs = H2OLocalServer()
hs._verbose = bool(verbose)
hs._jar_path = hs._find_jar(jar_path)
hs._extra_classpath = extra_classpath
hs._ice_root = ice_root
if not ice_root:
hs._ice_root = tempfile.mkdtemp()
hs._tempdir = hs._ice_root
if verbose: print("Attempting to start a local H2O server...")
hs._launch_server(port=port, baseport=baseport, nthreads=int(nthreads), ea=enable_assertions,
mmax=max_mem_size, mmin=min_mem_size)
if verbose: print(" Server is running at %s://%s:%d" % (hs.scheme, hs.ip, hs.port))
atexit.register(lambda: hs.shutdown())
return hs
[docs] def is_running(self):
"""Return True if the server process is still running, False otherwise."""
return self._process is not None and self._process.poll() is None
[docs] def shutdown(self):
"""
Shut down the server by trying to terminate/kill its process.
First we attempt to terminate the server process gracefully (sending SIGTERM signal). However after
_TIME_TO_KILL seconds if the process didn't shutdown, we forcefully kill it with a SIGKILL signal.
"""
if not self._process: return
try:
kill_time = time.time() + self._TIME_TO_KILL
while self._process.poll() is None and time.time() < kill_time:
self._process.terminate()
time.sleep(0.2)
if self._process().poll() is None:
self._process.kill()
time.sleep(0.2)
if self._verbose:
print("Local H2O server %s:%s stopped." % (self.ip, self.port))
except:
pass
self._process = None
@property
def scheme(self):
"""Connection scheme, 'http' or 'https'."""
return self._scheme
@property
def ip(self):
"""IP address of the server."""
return self._ip
@property
def port(self):
"""Port that the server is listening to."""
return self._port
#-------------------------------------------------------------------------------------------------------------------
# Private
#-------------------------------------------------------------------------------------------------------------------
def __init__(self):
"""[Internal] please use H2OLocalServer.start() to launch a new server."""
self._scheme = None # "http" or "https"
self._ip = None
self._port = None
self._process = None
self._verbose = None
self._jar_path = None
self._extra_classpath = None
self._ice_root = None
self._stdout = None
self._stderr = None
self._tempdir = None
def _find_jar(self, path0=None):
"""
Return the location of an h2o.jar executable.
:param path0: Explicitly given h2o.jar path. If provided, then we will simply check whether the file is there,
otherwise we will search for an executable in locations returned by ._jar_paths().
:raises H2OStartupError: if no h2o.jar executable can be found.
"""
jar_paths = [path0] if path0 else self._jar_paths()
searched_paths = []
for jp in jar_paths:
searched_paths.append(jp)
if os.path.exists(jp):
return jp
raise H2OStartupError("Cannot start local server: h2o.jar not found. Paths searched:\n" +
"".join(" %s\n" % s for s in searched_paths))
@staticmethod
def _jar_paths():
"""Produce potential paths for an h2o.jar executable."""
# PUBDEV-3534 hook to use arbitrary h2o.jar
own_jar = os.getenv("H2O_JAR_PATH", "")
if own_jar != "":
if not os.path.isfile(own_jar):
raise H2OStartupError("Environment variable H2O_JAR_PATH is set to '%d' but file does not exists, unset environment variable or provide valid path to h2o.jar file." % own_jar)
yield own_jar
# Check if running from an h2o-3 src folder (or any subfolder), in which case use the freshly-built h2o.jar
cwd_chunks = os.path.abspath(".").split(os.path.sep)
for i in range(len(cwd_chunks), 0, -1):
if cwd_chunks[i - 1] == "h2o-3":
yield os.path.sep.join(cwd_chunks[:i] + ["build", "h2o.jar"])
# Then check the backend/bin folder:
# (the following works assuming this code is located in h2o/backend/server.py file)
backend_dir = os.path.split(os.path.realpath(__file__))[0]
yield os.path.join(backend_dir, "bin", "h2o.jar")
# Then try several old locations where h2o.jar might have been installed
prefix1 = prefix2 = sys.prefix
# On Unix-like systems Python typically gets installed into /Library/... or /System/Library/... If one of
# those paths is sys.prefix, then we also build its counterpart.
if prefix1.startswith(os.path.sep + "Library"):
prefix2 = os.path.join("", "System", prefix1)
elif prefix1.startswith(os.path.sep + "System"):
prefix2 = prefix1[len(os.path.join("", "System")):]
yield os.path.join(prefix1, "h2o_jar", "h2o.jar")
yield os.path.join(os.path.abspath(os.sep), "usr", "local", "h2o_jar", "h2o.jar")
yield os.path.join(prefix1, "local", "h2o_jar", "h2o.jar")
yield os.path.join(get_config_var("userbase"), "h2o_jar", "h2o.jar")
yield os.path.join(prefix2, "h2o_jar", "h2o.jar")
def _launch_server(self, port, baseport, mmax, mmin, ea, nthreads):
"""Actually start the h2o.jar executable (helper method for `.start()`)."""
self._ip = "127.0.0.1"
# Find Java and check version. (Note that subprocess.check_output returns the output as a bytes object)
java = self._find_java()
self._check_java(java, self._verbose)
if self._verbose:
print(" Starting server from " + self._jar_path)
print(" Ice root: " + self._ice_root)
# Combine jar path with the optional extra classpath
classpath = [self._jar_path] if self._extra_classpath is None else [self._jar_path] + self._extra_classpath
# Construct java command to launch the process
cmd = [java]
# ...add JVM options
cmd += ["-ea"] if ea else []
for (mq, num) in [("-Xms", mmin), ("-Xmx", mmax)]:
if num is None: continue
numstr = "%dG" % (num >> 30) if num == (num >> 30) << 30 else \
"%dM" % (num >> 20) if num == (num >> 20) << 20 else \
str(num)
cmd += [mq + numstr]
cmd += ["-verbose:gc", "-XX:+PrintGCDetails", "-XX:+PrintGCTimeStamps"]
cmd += ["-cp", os.pathsep.join(classpath), "water.H2OApp"] # This should be the last JVM option
# ...add H2O options
cmd += ["-ip", self._ip]
cmd += ["-web_ip", self._ip]
cmd += ["-port", str(port)] if port else []
cmd += ["-baseport", str(baseport)] if baseport else []
cmd += ["-ice_root", self._ice_root]
cmd += ["-nthreads", str(nthreads)] if nthreads > 0 else []
cmd += ["-name", "H2O_from_python_%s" % self._tmp_file("salt")]
# Warning: do not change to any higher log-level, otherwise we won't be able to know which port the
# server is listening to.
cmd += ["-log_level", "INFO"]
# Create stdout and stderr files
self._stdout = self._tmp_file("stdout")
self._stderr = self._tmp_file("stderr")
cwd = os.path.abspath(os.getcwd())
out = open(self._stdout, "w")
err = open(self._stderr, "w")
if self._verbose:
print(" JVM stdout: " + out.name)
print(" JVM stderr: " + err.name)
# Launch the process
win32 = sys.platform == "win32"
flags = getattr(subprocess, "CREATE_NEW_PROCESS_GROUP", 0) if win32 else 0
prex = os.setsid if not win32 else None
try:
proc = subprocess.Popen(args=cmd, stdout=out, stderr=err, cwd=cwd, creationflags=flags, preexec_fn=prex)
except OSError as e:
traceback = getattr(e, "child_traceback", None)
raise H2OServerError("Unable to start server: %s" % e, traceback)
# Wait until the server is up-and-running
giveup_time = time.time() + self._TIME_TO_START
while True:
if proc.poll() is not None:
raise H2OServerError("Server process terminated with error code %d" % proc.returncode)
ret = self._get_server_info_from_logs()
if ret:
self._scheme = ret[0]
self._ip = ret[1]
self._port = ret[2]
self._process = proc
break
if time.time() > giveup_time:
elapsed_time = time.time() - (giveup_time - self._TIME_TO_START)
raise H2OServerError("Server wasn't able to start in %f seconds." % elapsed_time)
time.sleep(0.2)
@staticmethod
def _check_java(java, verbose):
jver_bytes = subprocess.check_output([java, "-version"], stderr=subprocess.STDOUT)
jver = jver_bytes.decode(encoding="utf-8", errors="ignore")
if verbose:
print(" Java Version: " + jver.strip().replace("\n", "; "))
if "GNU libgcj" in jver:
raise H2OStartupError("Sorry, GNU Java is not supported for H2O.\n"
"Please download the latest 64-bit Java SE JDK from Oracle.")
if "Client VM" in jver:
warn(" You have a 32-bit version of Java. H2O works best with 64-bit Java.\n"
" Please download the latest 64-bit Java SE JDK from Oracle.\n")
@staticmethod
def _find_java():
"""
Find location of the java executable (helper for `._launch_server()`).
This method is not particularly robust, and may require additional tweaking for different platforms...
:return: Path to the java executable.
:raises H2OStartupError: if java cannot be found.
"""
# is java callable directly (doesn't work on windows it seems)?
java = "java.exe" if sys.platform == "win32" else "java"
if os.access(java, os.X_OK):
return java
# Can Java be found on the PATH?
for path in os.getenv("PATH").split(os.pathsep): # not same as os.path.sep!
full_path = os.path.join(path, java)
if os.access(full_path, os.X_OK):
return full_path
# check if JAVA_HOME is set (for Windows)
if os.getenv("JAVA_HOME"):
full_path = os.path.join(os.getenv("JAVA_HOME"), "bin", java)
if os.path.exists(full_path):
return full_path
# check "/Program Files" and "/Program Files (x86)" on Windows
if sys.platform == "win32":
# On Windows, backslash on the drive letter is necessary, otherwise os.path.join produces an invalid path
program_folders = [os.path.join("C:\\", "Program Files", "Java"),
os.path.join("C:\\", "Program Files (x86)", "Java"),
os.path.join("C:\\", "ProgramData", "Oracle", "Java")]
for folder in program_folders:
for dirpath, dirnames, filenames in os.walk(folder):
if java in filenames:
return os.path.join(dirpath, java)
# not found...
raise H2OStartupError("Cannot find Java. Please install the latest JRE from\n"
"http://www.oracle.com/technetwork/java/javase/downloads/index.html")
def _tmp_file(self, kind):
"""
Generate names for temporary files (helper method for `._launch_server()`).
:param kind: one of "stdout", "stderr" or "salt". The "salt" kind is used for process name, not for a
file, so it doesn't contain a path. All generated names are based on the user name of the currently
logged-in user.
"""
if sys.platform == "win32":
username = os.getenv("USERNAME")
else:
username = os.getenv("USER")
if not username:
username = "unknownUser"
usr = "".join(ch if ch.isalnum() else "_" for ch in username)
if kind == "salt":
return usr + "_" + "".join(choice("0123456789abcdefghijklmnopqrstuvwxyz") for _ in range(6))
else:
if not self._tempdir:
self._tempdir = tempfile.mkdtemp()
return os.path.join(self._tempdir, "h2o_%s_started_from_python.%s" % (usr, kind[3:]))
def _get_server_info_from_logs(self):
"""
Check server's output log, and determine its scheme / IP / port (helper method for `._launch_server()`).
This method is polled during process startup. It looks at the server output log and checks for a presence of
a particular string ("INFO: Open H2O Flow in your web browser:") which indicates that the server is
up-and-running. If the method detects this string, it extracts the server's scheme, ip and port and returns
them; otherwise it returns None.
:returns: (scheme, ip, port) tuple if the server has already started, None otherwise.
"""
searchstr = "INFO: Open H2O Flow in your web browser:"
with open(self._stdout, "rt") as f:
for line in f:
if searchstr in line:
url = line[line.index(searchstr) + len(searchstr):].strip().rstrip("/")
parts = url.split(":")
assert len(parts) == 3 and (parts[0] == "http" or parts[1] == "https") and parts[2].isdigit(), \
"Unexpected URL: %s" % url
return parts[0], parts[1][2:], int(parts[2])
return None
def __enter__(self):
return self
def __exit__(self, *args):
self.shutdown()
assert len(args) == 3 # Avoid warning about unused args...
return False # ensure that any exception will be re-raised
# Do not stop child process when the object is garbage collected!
# This ensures that simple code such as
# for _ in range(5):
# h2o.H2OConnection.start()
# will launch 5 servers, and they will not be closed down immediately (only when the program exits).