form.yml.erb

# Batch Connect app configuration file
#
# @note Used to define the submitted cluster, title, description, and

cluster: "quests"

form:
  # What version of Spark/PySpark do you want to use
  - spark_version
  # Any extra user specific spark configuration settings
  - spark_configuration_file
  - only_driver_on_root
  # Customize the location XDG_DATA_HOME  so that jupyter cached product
  # do not go to ~/.local/share/jupyter/ which may cause home folder to fill up
  - xdg_data_home
  # This allows the user to select which partition they submit to
  - slurm_partition
  # This allows a user to request a GPU
  - gres_value
  # This allows a user to specify the account they are submitting under
  - slurm_account
  # This allows a user to use more than 1 core
  - number_of_workers
  # This allows a user to request RAM
  - memory_per_worker
  # How many hours do you want to run this job for
  - bc_num_hours
  # This allows a user to decide if they want to run jupyter lab or jupyter notebook
  - jupyterlab_switch
  # What folder do you want to server as the notebook "root" directory
  - notebook_directory_root
  # User can supply e-mail if they would like to be e-mailed when the session begins
  - user_email
  # Name of the Slurm job
  - job_name
  # these variables are for the JavaScript
  - raw_data
  - raw_group_data

attributes:
  spark_version:
    widget: select
    label: "PySpark version"
    help: "What version of PySpark would you like to launch"
    options:
      - [ "Spark 3.5.1, PySpark 2.12.18, MLFlow", "/software/spark/3.5.1/pyspark-notebook_spark-3.5.1.sif" ]

  xdg_data_home:
    label: "Set XDG_DATA_HOME (Optional)"
    value: ""
    help: |
      If you leave this blank, Jupyter will store its intermediate cache data products in `~/.local/share/jupyter/`. Otherwise, specify a full PATH to a folder on Quest. This is important to consider if the default location `~/.local/share/jupyter/`. causes you HOME directory to become full. 

  spark_configuration_file:
    help: Override defaults with a new configuration file. Leave blank to use defaults.

  only_driver_on_root:
    widget: "check_box"
    label: Only launch the driver on the master node.
    help: |
      This is typically used for `.collect` and `.take` operations that require
      a large amount of memory allocated (> 2GB) for the driver process.

  jupyterlab_switch:
    widget: "check_box"
    label: "Use JupyterLab instead of Jupyter Notebook?"
    help: |
      JupyterLab is the next generation of Jupyter, and is completely compatible with existing Jupyter Notebooks.

  job_name:
    label: "Name of the Job"
    value: "Jupyter Notebook"

  notebook_directory_root:
    label: "Jupyter root directory (Optional)"
    value: ""
    help: |
      If you leave this blank, Jupyter will run from your HOME folder. Otherwise, specify a full PATH to a folder on Quest. This is important to consider if running Jupyter Notebook as it limits the folders and files that Jupyter can see.

  bc_num_hours:
    label: "Wall Time (in number of hours)"
    help: |
      Select the maximum number of *hours* you want this session/job to last. You can always terminate your session/job early, but it will automatically be terminated by Slurm once the walltime is hit. This will set the value of the `--time` flag for your job. Your available options for walltime are effected by the partition that you have selected. For more information on walltimes, please see the [walltime section](https://services.northwestern.edu/TDClient/30/Portal/KB/ArticleDet?ID=1964#section-walltimes) of our Everything You Need to Know about Using Slurm on Quest page.
    value: 1

  slurm_partition: 
    label: "SLURM Partition"
    help: |
      Select the SLURM partition you will submit this job to. This will set the value of the `-p` or `--partition` flag for your job. This selection will impact a number of other options on this form, including which accounts/allocations and maximum walltimes you can choose and whether or not you have the option to select GPUs. For more information on partitions, please see the [partitions section](https://services.northwestern.edu/TDClient/30/Portal/KB/ArticleDet?ID=1964#section-partitions) of our Everything You Need to Know about Using Slurm on Quest page.
    widget: select

  slurm_account:
    label: "SLURM Account"
    help: |
      The Quest allocation/account under which you will be submitting this job. This will set the value of the `-A` or `--account` flag for your job. *Please note that if you do not see an allocation option you expect, this means one of the following:* that it is a storage only allocation, that your allocation is expired, in which case please request to renew it using the appropriate form that can be [found here](https://www.it.northwestern.edu/secure/forms/research/allocation-request-forms.html), or that the partition that you have selected cannot be submitted to from that allocation. More information on this parameter can be found in the [account section](https://services.northwestern.edu/TDClient/30/Portal/KB/ArticleDet?ID=1964#section-account) of our Everything You Need to Know about Using Slurm on Quest page.
    widget: select

  gres_value:
    label: "GPU(s) that you would like to request"
    help: |
      The number and type of GPUs you are requesting for this job, only available for certain partitions. This will set the value of the `--gres` flag for your job. More information on GPUs can be found on the [GPUs on Quest page](https://services.northwestern.edu/TDClient/30/Portal/KB/ArticleDet?ID=1112).
    widget: select
    default: ""

  number_of_workers:
    widget: "number_field"
    label: "Number of CPUs/cores/processors"
    value: 1
    help: |
      This describes how the cores and memory are divvied up on the node
      (*useful to reduce memory allocated for each worker*). Should be a
      multiple of the number of cores on the node you chose above. Do **NOT**
      exceed the number of cores on the node.
    step: 1
    id: 'number_of_workers'

  memory_per_worker:
    widget: "number_field"
    label: "The amount of memory each Apache spark worker can use."
    value: 4
    help: |
      How much total memory/RAM (in GBs) you want for this session. This will set the value of the `--mem` flag for your job. Knowing ahead of time how much memory you need can be difficult. That said, you can use the utility `seff` to profile *completed* jobs on Slurm which give the maximum memory used by that job. For more information on setting memory, please see the [memory section](https://services.northwestern.edu/TDClient/30/Portal/KB/ArticleDet?ID=1964#section-required-memory) of our Everything You Need to Know about Using Slurm on Quest page.
    min: 1
    max: 243
    step: 1
    id: 'memory_per_worker'

  user_email:
    label: "Email Address (Optional)"
    help: |
      Enter your email address if you would like to receive an email when the session starts. Leave blank for no email.

  raw_data:
    label: "Account associations for use by JavaScript"
    widget: hidden_field
    value: "<%= RawSlurmData.read_sinfo_from_file %>"
    cacheable: false

  raw_group_data:
    label: "linux groups for use by JavaScript"
    widget: hidden_field
    value: "<%= RawSlurmData.rawgroupdata %>"
    cacheable: false