Commit 88740949 authored by John Tourtellott's avatar John Tourtellott
Browse files

Add option to generate AWS parallel cluster files

In progress
parent 597d0021
__pycache__
......@@ -97,6 +97,7 @@ def ExportCMB(export_op):
scope = ExportScope()
scope.logger = export_op.log()
scope.warning_messages = list()
scope.export_att = export_op.parameters()
scope.sim_atts = smtk.attribute.Resource.CastTo(scope.export_att.find('attributes').value())
if scope.sim_atts is None:
......@@ -293,6 +294,13 @@ def ExportCMB(export_op):
# And any sym link
print('Symlink', scope.symlink)
# Check for AWSSimulation item
aws_item = scope.export_att.find('AWSSimulation')
if aws_item is not None and aws_item.isEnabled():
from internal.writers import aws
reload(aws)
aws_writer = aws.AWSWriter(scope)
completed &= aws_writer.export_ace3p(aws_item)
return completed
......
......@@ -146,6 +146,10 @@
</Structure>
</DiscreteInfo>
</String>
<Directory Name="ProjectFolder" Label="Project Folder" AdvanceLevel="1"
Optional="true" IsEnabledByDefault="false" Version="0">
<BriefDescription>The project root folder on the local filesystem</BriefDescription>
</Directory>
<Group Name="NERSCSimulation" Label="Submit job to NERSC"
Optional="true" IsEnabledByDefault="false"
......@@ -257,8 +261,151 @@
</String>
--> </ItemDefinitions>
</Group>
<Group Name="AWSSimulation" Label="Specify AWS Job (Experimental)" AdvanceLevel="1"
Optional="true" IsEnabledByDefault="false"
Version="1" NumberOfRequiredGroups="1">
<ItemDefinitions>
<String Name="JobName" Label="Job name" Version="0">
<BriefDescription>Label you can use to track your job</BriefDescription>
<DefaultValue>ACE3P</DefaultValue>
</String>
<String Name="JobNotes" Label="Notes" Version="0" MultipleLines="true">
<BriefDescription>Optional notes you want to save with this job</BriefDescription>
<DefaultValue> </DefaultValue>
</String>
<File Name="AWSConfigFile" Label="AWS Config File" ShouldExist="true" Version="0">
<BriefDescription>AWS ParallelCluster config file to use as a template</BriefDescription>
</File>
<String Name="ClusterSection" Label="Cluster Section">
<DefaultValue>cluster default</DefaultValue>
</String>
<String Name="InstanceType" Label="AWS Instance Family">
<ChildrenDefinitions>
<String Name="c4" Label="Instance Type">
<DiscreteInfo>
<Value Enum="c4.large (2 vCPU, 3.75 GB, 500 Mbps)">c4.large</Value>
<Value Enum="c4.xlarge (4 vCPU, 7.5 GB, 750 Mbps)">c4.xlarge</Value>
<Value Enum="c4.2xlarge (8 vCPU, 15 GB, 1000 Mbps)">c4.2xlarge</Value>
<Value Enum="c4.4xlarge (16 vCPU, 30 GB, 2000 Mbps)">c4.4xlarge</Value>
<Value Enum="c4.8xlarge (36 vCPU, 60 GB, 4000 Mbps)">c4.8xlarge</Value>
</DiscreteInfo>
</String>
<String Name="c5" Label="Instance Type">
<DiscreteInfo>
<Value Enum="c5.large (2 vCPU, 4 GB, Up to 10 Gbps)">c5.large</Value>
<Value Enum="c5.xlarge (4 vCPU, 8 GB, Up to 10 Gbps)">c5.xlarge</Value>
<Value Enum="c5.2xlarge (8 vCPU, 16 GB, Up to 10 Gbps)">c5.2xlarge</Value>
<Value Enum="c5.4xlarge (16 vCPU, 32 GB, Up to 10 Gbps)">c5.4xlarge</Value>
<Value Enum="c5.9xlarge (36 vCPU, 72 GB, 10 Gbps)">c5.9xlarge</Value>
<Value Enum="c5.12xlarge (48 vCPU, 96 GB, 12 Gbps)">c5.12xlarge</Value>
<Value Enum="c5.18xlarge (72 vCPU, 144 GB, 25 Gbps)">c5.18xlarge</Value>
<Value Enum="c5.24xlarge (96 vCPU, 192 GB, 25 Gbps)">c5.24xlarge</Value>
</DiscreteInfo>
</String>
<String Name="c5n" Label="Instance Type">
<ChildrenDefinitions>
<Void Name="ElasticFabricAdapter" Label="Use Elastic Fabric Adapter (EFA)"
Optional="true" IsEnabledByDefault="false" />
</ChildrenDefinitions>
<DiscreteInfo>
<Value Enum="c5n.large (2 vCPU, 5.25 GB, Up to 25 Gbps)">c5n.large</Value>
<Value Enum="c5n.xlarge (4 vCPU, 10.5 GB, Up to 25 Gbps)">c5n.xlarge</Value>
<Value Enum="c5n.2xlarge (8 vCPU, 21 GB, Up to 25 Gbps)">c5n.2xlarge</Value>
<Value Enum="c5n.4xlarge (16 vCPU, 42 GB, Up to 25 Gbps)">c5n.4xlarge</Value>
<Value Enum="c5n.9xlarge (36 vCPU, 96 GB, 50 Gbps)">c5n.9xlarge</Value>
<Structure>
<Value Enum="c5n.18xlarge (72 vCPU, 192 GB, 100 Gbps)">c5n.18xlarge</Value>
<Items>
<Item>ElasticFabricAdapter</Item>
</Items>
</Structure>
</DiscreteInfo>
</String>
<String Name="t2" Label="Instance Type">
<DiscreteInfo>
<Value Enum="t2.micro (1 vCPU, 1 GB)">t2.micro</Value>
<Value Enum="t2.large (2 vCPU, 8 GB)">t2.large</Value>
<Value Enum="t2.xlarge (4 vCPU, 16GB)">t2.xlarge</Value>
<Value Enum="t2.2xlarge (8 vCPU, 32GB)">t2.2xlarge</Value>
</DiscreteInfo>
</String>
<String Name="t3" Label="Instance Type">
<DiscreteInfo>
<Value Enum="t3.large (2 vCPU, 8 GB)">t3.large</Value>
<Value Enum="t3.xlarge (4 vCPU, 16GB)">t3.xlarge</Value>
<Value Enum="t3.2xlarge (8 vCPU, 32GB)">t3.2xlarge</Value>
</DiscreteInfo>
</String>
<String Name="t3a" Label="Instance Type">
<DiscreteInfo>
<Value Enum="t3a.large (2 vCPU, 8 GB)">t3a.large</Value>
<Value Enum="t3a.xlarge (4 vCPU, 16GB)">t3a.xlarge</Value>
<Value Enum="t3a.2xlarge (8 vCPU, 32GB)">t3a.2xlarge</Value>
</DiscreteInfo>
</String>
<String Name="OtherType" Label="Other" />
</ChildrenDefinitions>
<DiscreteInfo>
<Structure>
<Value>c4</Value>
<Items>
<Item>c4</Item>
</Items>
</Structure>
<Structure>
<Value>c5</Value>
<Items>
<Item>c5</Item>
</Items>
</Structure>
<Structure>
<Value>c5n</Value>
<Items>
<Item>c5n</Item>
</Items>
</Structure>
<Structure>
<Value>t2</Value>
<Items>
<Item>t2</Item>
</Items>
</Structure>
<Structure>
<Value>t3</Value>
<Items>
<Item>t3</Item>
</Items>
</Structure>
<Structure>
<Value>t3a</Value>
<Items>
<Item>t3a</Item>
</Items>
</Structure>
<Structure>
<Value>Other</Value>
<Items>
<Item>OtherType</Item>
</Items>
</Structure>
</DiscreteInfo>
</String>
<Int Name="NumberOfNodes" Label="Number of nodes" Version="0">
<DefaultValue>1</DefaultValue>
<RangeInfo><Min Inclusive="true">1</Min></RangeInfo>
</Int>
<Int Name="Timeout" Label="Time limit" Units="min" Version="0">
<DefaultValue>5</DefaultValue>
<RangeInfo><Min Inclusive="true">1</Min></RangeInfo>
</Int>
</ItemDefinitions>
</Group>
</ItemDefinitions>
</AttDef>
</Definitions>
<Views>
<View Type="Instanced" Title="Export Settings" TopLevel="true" FilterByCategory="false" FilterByAdvanceLevel="true">
......
#=============================================================================
#
# Copyright (c) Kitware, Inc.
# All rights reserved.
# See LICENSE.txt for details.
#
# This software is distributed WITHOUT ANY WARRANTY; without even
# the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
# PURPOSE. See the above copyright notice for more information.
#
#=============================================================================
import configparser
import datetime
import os
import warnings
print('loading', os.path.basename(__file__))
import smtk
import smtk.attribute
import smtk.io
# import smtk.model
# ---------------------------------------------------------------------
class AWSWriter:
""""""
def __init__(self, scope):
""""""
# Todo Check OS? (only support linux?)
# Check for genesis/exodus model files, which aren't supported
for path in scope.files_to_upload:
basename, ext = os.path.splitext(path)
if ext in ['.gen', '.exo', '.ex2']:
msg = 'AWS does not Genesis/Exodus models: {}'.format(ext)
print(msg)
RuntimeError(msg)
self.aws_item = None
self.scope = scope
def export_ace3p(self, aws_item):
self.aws_item = aws_item
# Create job folder
project_folder_item = self.scope.export_att.findDirectory('ProjectFolder')
if project_folder_item is None or \
not project_folder_item.isEnabled() or \
not project_folder_item.isSet():
msg = 'AWSWriter Error: ProjectFolder is not specified'
print(msg)
raise RuntimeError(msg)
dt = datetime.datetime.now()
folder_name = dt.strftime('aws-%y%m%d-%H%M%S')
# Generate cluster configuration, starting with specified default file
config_file_item = self.scope.export_att.itemAtPath('AWSSimulation/AWSConfigFile')
if config_file_item is None or not config_file_item.isSet():
msg = 'AWSWriter ERROR: AWS config file not specified'
print(msg)
raise RuntimeError(msg)
config_file_path = config_file_item.value()
if not os.path.exists(config_file_path):
msg = 'AWSWriter ERROR: AWS config file not found: {}'.format(config_file_path)
print(msg)
raise RuntimeError(msg)
# Get the cluster section to use
cluster_key_item = self.scope.export_att.itemAtPath('AWSSimulation/ClusterSection')
cluster_key = cluster_key_item.value()
# Load config file and set instance type
config = configparser.ConfigParser()
config.read_file(open(config_file_path))
instance_type_item = self.scope.export_att.itemAtPath('AWSSimulation/InstanceType')
# There must be one active child
if instance_type_item.numberOfActiveChildrenItems() != 1:
n = instance_type_item.numberOfActiveChildrenItems()
msg = 'AWSWriter ERROR: instance type should have 1 active child item, not {}'.format(n)
print(msg)
raise RuntimeError(msg)
instance_item = instance_type_item.activeChildItem(0)
config[cluster_key]['compute_instance_type'] = instance_item.value()
# Todo change master instance type to a compute node?
# config[cluster_key]['master_instance_type'] = instance_item.value()
# Todo Generate arguments for aws submit script, using:
# - NumberOfNodes item
# - Timeout (min) item
# Create job folder
folder_path = os.path.join(project_folder_item.value(), 'jobs', folder_name)
os.makedirs(folder_path)
# Write list of files to upload
upload_info_path = os.path.join(folder_path, 'files_to_upload.txt')
with open(upload_info_path, 'w') as upload_info_file:
for path in self.scope.files_to_upload:
upload_info_path.write(path)
upload_info_path.write('\n')
# Todo copy the ace3p input file (?)
# Write the config file
config_path = os.path.join(folder_path, 'parallelcluster.config')
with open(config_path, 'w') as configfile:
config.write(configfile)
# Write export attributes to the job folder
att_writer = smtk.io.AttributeWriter()
logger = smtk.io.Logger()
export_atts_filename = 'export.sbi'
export_atts_path = os.path.join(folder_path, export_atts_filename)
err = att_writer.write(self.scope.export_att.attributeResource(), export_atts_path, logger)
if err:
msg = 'AWSWriter Error: Unable to write export attribute file {}'.format(export_atts_path)
print(msg)
raise RuntimeError(msg)
# Create stamp subdirectory and touch "export" file
stamp_path = os.path.join(folder_path, 'stamp')
os.makedirs(stamp_path)
export_path = os.path.join(stamp_path, 'export')
with open(export_path, 'w') as st:
pass
print('Created job folder {}'.format(folder_path))
return True
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment