Adam Richie-Halford, University of Washington Dept of Physics
Ariel Rokem, University of Washington eScience Institute
Follow along at http://richford.github.io/scipy2018-cloudknot-talk/
Pros:
Cons:
Reap the benefits of AWS from the comfort of our Python env
Have an adventure without leaving The Shire
Other platforms have sought to lower the AWS entry barrier
Pros:
Cons:
Executor
from Python's concurrent futuresDefine the user defined function (UDF).
import cloudknot as ck
def monte_pi_count(n):
import numpy as np
x = np.random.rand(n)
y = np.random.rand(n)
return np.count_nonzero(x * x + y * y <= 1.0)
N.B. we import prerequisites inside the UDF.
import cloudknot as ck
def monte_pi_count(n):
import numpy as np
x = np.random.rand(n)
y = np.random.rand(n)
return np.count_nonzero(x * x + y * y <= 1.0)
Instantiate a Knot, creating resources on AWS.
import cloudknot as ck
def monte_pi_count(n):
import numpy as np
x = np.random.rand(n)
y = np.random.rand(n)
return np.count_nonzero(x * x + y * y <= 1.0)
knot = ck.Knot(name='pi-calc', func=monte_pi_count)
Submit jobs with the map()
method.
import cloudknot as ck
def monte_pi_count(n):
import numpy as np
x = np.random.rand(n)
y = np.random.rand(n)
return np.count_nonzero(x * x + y * y <= 1.0)
knot = ck.Knot(name='pi-calc', func=monte_pi_count)
n_jobs, n_samples = 1000, 100000000
import numpy as np
args = np.ones(n_jobs, dtype=np.int32) * n_samples
future = knot.map(args)
Summarize the status of submitted jobs.
import cloudknot as ck
def monte_pi_count(n):
import numpy as np
x = np.random.rand(n)
y = np.random.rand(n)
return np.count_nonzero(x * x + y * y <= 1.0)
knot = ck.Knot(name='pi-calc', func=monte_pi_count)
n_jobs, n_samples = 1000, 100000000
import numpy as np
args = np.ones(n_jobs, dtype=np.int32) * n_samples
future = knot.map(args)
knot.view_jobs()
[out]: Job ID Name Status
----------------------------------------
fcd2a14b... pi-calc-0 PENDING
Query the result status.
import cloudknot as ck
def monte_pi_count(n):
import numpy as np
x = np.random.rand(n)
y = np.random.rand(n)
return np.count_nonzero(x * x + y * y <= 1.0)
knot = ck.Knot(name='pi-calc', func=monte_pi_count)
n_jobs, n_samples = 1000, 100000000
import numpy as np
args = np.ones(n_jobs, dtype=np.int32) * n_samples
future = knot.map(args)
knot.view_jobs()
done_yet = future.done()
Retrieve the result.
import cloudknot as ck
def monte_pi_count(n):
import numpy as np
x = np.random.rand(n)
y = np.random.rand(n)
return np.count_nonzero(x * x + y * y <= 1.0)
knot = ck.Knot(name='pi-calc', func=monte_pi_count)
n_jobs, n_samples = 1000, 100000000
import numpy as np
args = np.ones(n_jobs, dtype=np.int32) * n_samples
future = knot.map(args)
knot.view_jobs()
done_yet = future.done()
res = future.result()
Or retrieve previously submitted results.
import cloudknot as ck
def monte_pi_count(n):
import numpy as np
x = np.random.rand(n)
y = np.random.rand(n)
return np.count_nonzero(x * x + y * y <= 1.0)
knot = ck.Knot(name='pi-calc', func=monte_pi_count)
n_jobs, n_samples = 1000, 100000000
import numpy as np
args = np.ones(n_jobs, dtype=np.int32) * n_samples
future = knot.map(args)
knot.view_jobs()
done_yet = future.done()
res = future.result()
res = knot.jobs[-1].result() # Equivalent to future.result()
Or add a callback to the final result
import cloudknot as ck
def monte_pi_count(n):
import numpy as np
x = np.random.rand(n)
y = np.random.rand(n)
return np.count_nonzero(x * x + y * y <= 1.0)
knot = ck.Knot(name='pi-calc', func=monte_pi_count)
n_jobs, n_samples = 1000, 100000000
import numpy as np
args = np.ones(n_jobs, dtype=np.int32) * n_samples
future = knot.map(args)
knot.view_jobs()
done_yet = future.done()
res = future.result()
res = knot.jobs[-1].result() # Equivalent to future.result()
PI = 0.0
def pi_from_future(future):
global PI
PI = 4.0 * np.sum(future.result()) / (n_samples * n_jobs)
future.add_done_callback(pi_from_future)
import cloudknot as ck
def awesome_func(...):
...
knot = ck.Knot(func=awesome_func)
import cloudknot as ck
def awesome_func(...):
...
knot = ck.Knot(func=awesome_func)
...
future = knot.map(args)
Solve
with some boundary conditions.
Increase number of constraints.
Increase system size.
⇒
Compare to Dask, Myria, Spark using previous benchmark study.
import cloudknot
cloudknot.Knot()
map()
methodGithub repo: https://github.com/richford/cloudknot
Documentation: https://richford.github.io/cloudknot/index.html
We welcome issues and contributions!