ports/sysutils/slurm-wlm/files/patch-etc_slurm.conf.example
Jason W. Bacon 97c163bbef sysutils/slurm-wlm: Switch from hwloc to hwloc2
Also minor improvements to example slurm.conf

PR:             252495
Reported by:    thierry
2021-01-07 20:32:46 +00:00

123 lines
3.8 KiB
Text

--- etc/slurm.conf.example.orig 2020-03-26 21:44:05 UTC
+++ etc/slurm.conf.example
@@ -8,8 +8,9 @@
#
# See the slurm.conf man page for more information.
#
-ClusterName=linux
-ControlMachine=linux0
+ClusterName=Beastie
+# Short hostname of the head node
+ControlMachine=head
#ControlAddr=
#BackupController=
#BackupAddr=
@@ -25,8 +26,8 @@ StateSaveLocation=/var/spool/slurm/ctld
SlurmdSpoolDir=/var/spool/slurm/d
SwitchType=switch/none
MpiDefault=none
-SlurmctldPidFile=/var/run/slurmctld.pid
-SlurmdPidFile=/var/run/slurmd.pid
+SlurmctldPidFile=/var/run/slurm/slurmctld.pid
+SlurmdPidFile=/var/run/slurm/slurmd.pid
ProctrackType=proctrack/pgid
#PluginDir=
#FirstJobId=
@@ -34,7 +35,7 @@ ReturnToService=0
#MaxJobCount=
#PlugStackConfig=
#PropagatePrioProcess=
-#PropagateResourceLimits=
+PropagateResourceLimits=NONE
#PropagateResourceLimitsExcept=
#Prolog=
#Epilog=
@@ -42,7 +43,9 @@ ReturnToService=0
#SrunEpilog=
#TaskProlog=
#TaskEpilog=
-#TaskPlugin=
+TaskPlugin=task/affinity
+TaskPluginParam=cores
+# For debugging: TaskPluginParam=cores,verbose
#TrackWCKey=no
#TreeWidth=50
#TmpFS=
@@ -57,10 +60,11 @@ KillWait=30
Waittime=0
#
# SCHEDULING
+DefMemPerCPU=256
SchedulerType=sched/backfill
#SchedulerAuth=
SelectType=select/cons_tres
-SelectTypeParameters=CR_Core
+SelectTypeParameters=CR_Core_Memory
#PriorityType=priority/multifactor
#PriorityDecayHalfLife=14-0
#PriorityUsageResetPeriod=14-0
@@ -72,22 +76,58 @@ SelectTypeParameters=CR_Core
#
# LOGGING
SlurmctldDebug=info
-SlurmctldLogFile=/var/log/slurmctld.log
+SlurmctldLogFile=/var/log/slurm/slurmctld
SlurmdDebug=info
-SlurmdLogFile=/var/log/slurmd.log
+SlurmdLogFile=/var/log/slurm/slurmd
JobCompType=jobcomp/none
#JobCompLoc=
#
# ACCOUNTING
-#JobAcctGatherType=jobacct_gather/linux
+JobAcctGatherType=jobacct_gather/none
#JobAcctGatherFrequency=30
#
-#AccountingStorageType=accounting_storage/slurmdbd
+AccountingStorageType=accounting_storage/filetxt
+AccountingStorageLoc=/home/slurm/Accounting
+AccountingStoreJobComment=YES
#AccountingStorageHost=
#AccountingStorageLoc=
#AccountingStoragePass=
#AccountingStorageUser=
+
+############################################################################
+# Enable power saving if remote IPMI power-on is available on compute nodes.
+# If unavailable on some nodes, list them in SuspendExcNodes.
+# SlurmUser must be a member of operator and wheel and have a valid
+# login shell in order to execute shutdown on compute nodes.
+# If you prefer to control power manually, see the following scripts
+# from the SPCM port:
#
+# auto-ipmi-remote-power
+# cluster-power-saver
+# cluster-power-waster
+# cluster-ipmi-power-on
+############################################################################
+
+# SuspendProgram=/usr/local/etc/spcm/slurm-node-suspend
+# SuspendTime should be >= SuspendTimeout + ResumeTimeout.
+# SuspendTime=600
+# SuspendTimeout=60
+#
+# ResumeProgram=/usr/local/etc/spcm/slurm-node-resume
+# ResumeTimeout=300
+# BatchStartTimeout=300
+#
+# Exempt compute nodes that double as file servers or don't have IPMI
+# remote power-on enabled.
+#
+# SuspendExcNodes=compute-001
+
+#
# COMPUTE NODES
-NodeName=linux[1-32] Procs=1 State=UNKNOWN
-PartitionName=debug Nodes=ALL Default=YES MaxTime=INFINITE State=UP
+# Set RealMemory < avail memory in /var/run/dmesg.boot
+# Note that it may change slightly following freebsd-update
+NodeName=compute-[001-002] Sockets=2 CoresPerSocket=6 RealMemory=30000 State=UNKNOWN
+# NodeName=compute-256g-[001-002] Sockets=2 CoresPerSocket=6 RealMemory=250000 State=UNKNOWN
+# PartitionName=debug Nodes=ALL Default=NO MaxTime=INFINITE State=UP
+PartitionName=batch Nodes=compute-[001-002] Default=YES MaxTime=INFINITE State=UP
+# PartitionName=256g Nodes=compute-256g-[001-002] Default=NO MaxTime=INFINITE State=UP