{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### A test of the performance prediction model" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "from collections import OrderedDict as odict\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import matplotlib.cm as cm" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "#(hardware name, number of nodes)\n", "files = odict({})\n", "files['i5'] = ('i5',1)\n", "files['gtx1060'] = ('gtx1060',1)\n", "files['skl_mpi1'] = ('skl',1)\n", "files['skl_mpi2'] = ('skl',2)\n", "files['skl_mpi4'] = ('skl',4)\n", "files['knl_mpi1'] = ('knl',1)\n", "files['knl_mpi2'] = ('knl',2)\n", "files['knl_mpi4'] = ('knl',4)\n", "files['p100nv_mpi1'] = ('p100',1)\n", "files['p100nv_mpi2'] = ('p100',2)\n", "files['p100nv_mpi4'] = ('p100',4)\n", "files['v100nv_mpi1'] = ('v100',1)\n", "files['v100nv_mpi2'] = ('v100',2)\n", "files['v100nv_mpi4'] = ('v100',4)\n", "# order by number of nodes to make labeling easier further down\n", "files=odict(sorted(files.items(), key= lambda t : t[1][1]))\n", "# count number of 1 nodes in dict\n", "number=0\n", "for k,v in files.items(): \n", " if v[1]==1: number+=1\n", "#setup plotting specifications\n", "arch = {'knl':(cm.Greens, 450,0.5,0.33),'skl':(cm.Greys,200,0.5,0.75), 'p100':(cm.Blues, 550,0.5,0.43),\n", " 'v100':(cm.Purples,850,0.5,0.85), 'i5':(cm.Wistia,30,0.5,0.79),'gtx1060':(cm.Oranges,155,0.5,0.70)}\n", "intens={1:0.8, 2:0.6, 4:0.4}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Here, we setup the prediction model by giving the number of function calls and memory operations of each of the three types of primitive functions axpby, dot and dxdy" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "#(axpby,dot,dxdy)\n", "latencies = odict()\n", "latencies['scal'] = (1,0,0)\n", "latencies['axpby'] = (1,0,0)\n", "latencies['pointwiseDot'] = (1,0,0)\n", "latencies['dot'] = (0,1,0)\n", "latencies['dx'] = (0,0,1)\n", "latencies['dy'] = (0,0,1)\n", "latencies['arakawa'] = (3,0,6) # N = 9\n", "latencies['cg'] = (6,2,6) # N = 13\n", "latencies['avg']= (9,2,12) # N=23\n", "memops = odict()\n", "memops['scal']= (2,0,0)\n", "memops['axpby']= (3,0,0)\n", "memops['pointwiseDot']= (6,0,0)\n", "memops['dot']= (0,2,0)\n", "memops['dx']= (0,0,3)\n", "memops['dy']= (0,0,3)\n", "memops['arakawa'] = (16,0,18) # M = 34 -> M/N = 3.78\n", "memops['cg'] = (20,4,18) # M = 42 -> M/N = 3.23\n", "memops['avg'] = (36,4,36) # M = 76 -> M/N = 3.30" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let us read in the previously measured bandwidths and latencies" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | axpby_bw | \n", "axpby_bw_err | \n", "dot_bw | \n", "dot_bw_err | \n", "dxdy2_bw | \n", "dxdy2_bw_err | \n", "dxdy3_bw | \n", "dxdy3_bw_err | \n", "dxdy4_bw | \n", "dxdy4_bw_err | \n", "... | \n", "axpby_lat_dist | \n", "axpby_lat_dist_err | \n", "dot_lat_shared | \n", "dot_lat_shared_err | \n", "dot_lat_dist | \n", "dot_lat_dist_err | \n", "dxdy_lat_shared | \n", "dxdy_lat_shared_err | \n", "dxdy_lat_dist | \n", "dxdy_lat_dist_err | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
i5 | \n", "29.99 | \n", "0.19 | \n", "9.31 | \n", "0.04 | \n", "27.79 | \n", "2.97 | \n", "29.12 | \n", "2.84 | \n", "25.58 | \n", "1.49 | \n", "... | \n", "nan | \n", "nan | \n", "4.76 | \n", "0.23 | \n", "nan | \n", "nan | \n", "0.00 | \n", "1.44 | \n", "nan | \n", "nan | \n", "
gtx1060 | \n", "157.05 | \n", "0.06 | \n", "26.50 | \n", "0.10 | \n", "130.63 | \n", "0.40 | \n", "111.23 | \n", "1.11 | \n", "83.82 | \n", "13.83 | \n", "... | \n", "nan | \n", "nan | \n", "92.06 | \n", "8.70 | \n", "nan | \n", "nan | \n", "0.00 | \n", "0.82 | \n", "nan | \n", "nan | \n", "
skl | \n", "206.71 | \n", "5.87 | \n", "192.05 | \n", "18.31 | \n", "181.56 | \n", "35.38 | \n", "161.75 | \n", "13.00 | \n", "118.06 | \n", "18.39 | \n", "... | \n", "0.00 | \n", "0.26 | \n", "17.28 | \n", "2.32 | \n", "37.93 | \n", "4.14 | \n", "22.70 | \n", "2.11 | \n", "28.52 | \n", "2.10 | \n", "
knl | \n", "393.15 | \n", "22.19 | \n", "141.36 | \n", "6.63 | \n", "239.04 | \n", "17.02 | \n", "172.69 | \n", "26.80 | \n", "126.04 | \n", "18.59 | \n", "... | \n", "9.16 | \n", "0.09 | \n", "54.83 | \n", "1.79 | \n", "119.59 | \n", "5.14 | \n", "9.93 | \n", "0.70 | \n", "52.67 | \n", "3.72 | \n", "
p100 | \n", "550.51 | \n", "1.23 | \n", "375.61 | \n", "1.94 | \n", "293.25 | \n", "7.11 | \n", "238.99 | \n", "12.63 | \n", "208.44 | \n", "7.05 | \n", "... | \n", "0.00 | \n", "0.27 | \n", "50.89 | \n", "7.06 | \n", "51.67 | \n", "0.59 | \n", "26.23 | \n", "0.05 | \n", "54.40 | \n", "0.35 | \n", "
titanXp | \n", "431.24 | \n", "3.45 | \n", "61.37 | \n", "0.12 | \n", "372.85 | \n", "4.16 | \n", "308.92 | \n", "9.47 | \n", "246.73 | \n", "7.92 | \n", "... | \n", "nan | \n", "nan | \n", "44.37 | \n", "5.15 | \n", "nan | \n", "nan | \n", "2.38 | \n", "0.57 | \n", "nan | \n", "nan | \n", "
v100 | \n", "846.42 | \n", "0.95 | \n", "610.15 | \n", "5.99 | \n", "794.43 | \n", "20.52 | \n", "735.42 | \n", "33.02 | \n", "696.49 | \n", "15.14 | \n", "... | \n", "0.00 | \n", "0.31 | \n", "88.49 | \n", "4.68 | \n", "97.58 | \n", "0.79 | \n", "4.20 | \n", "0.02 | \n", "37.19 | \n", "0.42 | \n", "
7 rows × 24 columns
\n", "\n", " | B(P=2) [GB/s] | \n", "B(P=3) [GB/s] | \n", "B(P=4) [GB/s] | \n", "B(P=5) [GB/s] | \n", "$T_{lat}(1)$ [$\\mu$s] | \n", "$T_{lat}(4)$ [$\\mu$s] | \n", "
---|---|---|---|---|---|---|
i5 | \n", "26 $\\pm$ 02 | \n", "27 $\\pm$ 02 | \n", "26 $\\pm$ 01 | \n", "23 $\\pm$ 02 | \n", "01 $\\pm$ 01 | \n", "n/a | \n", "
gtx1060 | \n", "116 $\\pm$ 01 | \n", "108 $\\pm$ 01 | \n", "94 $\\pm$ 09 | \n", "85 $\\pm$ 12 | \n", "09 $\\pm$ 01 | \n", "n/a | \n", "
skl | \n", "194 $\\pm$ 20 | \n", "183 $\\pm$ 09 | \n", "153 $\\pm$ 15 | \n", "147 $\\pm$ 07 | \n", "14 $\\pm$ 02 | \n", "19 $\\pm$ 02 | \n", "
knl | \n", "281 $\\pm$ 13 | \n", "232 $\\pm$ 24 | \n", "188 $\\pm$ 20 | \n", "160 $\\pm$ 18 | \n", "13 $\\pm$ 01 | \n", "42 $\\pm$ 02 | \n", "
titanXp | \n", "310 $\\pm$ 02 | \n", "287 $\\pm$ 04 | \n", "259 $\\pm$ 05 | \n", "230 $\\pm$ 21 | \n", "06 $\\pm$ 01 | \n", "n/a | \n", "
p100 | \n", "383 $\\pm$ 06 | \n", "336 $\\pm$ 12 | \n", "306 $\\pm$ 08 | \n", "267 $\\pm$ 21 | \n", "22 $\\pm$ 01 | \n", "33 $\\pm$ 01 | \n", "
v100 | \n", "806 $\\pm$ 11 | \n", "776 $\\pm$ 18 | \n", "755 $\\pm$ 09 | \n", "691 $\\pm$ 43 | \n", "11 $\\pm$ 01 | \n", "28 $\\pm$ 01 | \n", "