SparkCLI ​Spark​C​L​I

yaml
type: "io.kestra.plugin.spark.sparkcli"
yaml
id: spark_cli
namespace: company.team

tasks:
  - id: hello
    type: io.kestra.plugin.spark.SparkCLI
    inputFiles:
      pi.py: |
        import sys
        from random import random
        from operator import add
        from pyspark.sql import SparkSession

        if __name__ == "__main__":
            spark = SparkSession.builder.appName("PythonPi").getOrCreate()

            partitions = int(sys.argv[1]) if len(sys.argv) > 1 else 2
            n = 100000 * partitions

            def f(_: int) -> float:
                x = random() * 2 - 1
                y = random() * 2 - 1
                return 1 if x ** 2 + y ** 2 <= 1 else 0

            count = spark.sparkContext.parallelize(range(1, n + 1), partitions).map(f).reduce(add)
            print("Pi is roughly %f" % (4.0 * count / n))

            spark.stop()
    containerImage: bitnami/spark
    taskRunner:
      networkMode: host
    commands:
      - spark-submit --name Pi --master spark://localhost:7077 pi.py
Properties
SubType string
SubType string
Default bitnami/spark
SubType string
Default true
SubType string
Default ["/bin/sh","-c"]
SubType string
Default AUTO
Possible Values
LINUXWINDOWSAUTO
Default 0
SubType string
Default busybox
Default true
SubType string
Default false
Default OVERWRITE
Possible Values
OVERWRITEFAILWARNIGNORE
SubType string
SubType string
Default ["{{flow.namespace}}"]
Default true
Default { "image": "busybox" }
Default default
Default ALWAYS
Possible Values
IF_NOT_PRESENTALWAYSNEVER
Default true
Validation RegExp \d+\.\d+\.\d+(-[a-zA-Z0-9-]+)?|([a-zA-Z0-9]+)
Default PT5S
Format duration
Default PT1H
Format duration
Default PT10M
Format duration
Default PT5S
Format duration
Default true
Default true
Default false
Validation RegExp \d+\.\d+\.\d+(-[a-zA-Z0-9-]+)?|([a-zA-Z0-9]+)
Default PT1H
Format duration
SubType integer
Default PT5S
Format duration
Default true
Default true
SubType string
Default ["https://www.googleapis.com/auth/cloud-platform"]
Validation RegExp \d+\.\d+\.\d+(-[a-zA-Z0-9-]+)?|([a-zA-Z0-9]+)
Default PT5S
Format duration
Default PT1H
Format duration
Possible Values
ACTION_UNSPECIFIEDRETRY_TASKFAIL_TASKUNRECOGNIZED
Default v1
Default RSA
Default https://kubernetes.default.svc
Validation RegExp \d+\.\d+\.\d+(-[a-zA-Z0-9-]+)?|([a-zA-Z0-9]+)
Default PT5S
Format duration
Default true
SubType string
Default e2-medium
Default 2
Minimum >= 0
Maximum <= 10
Default true
SubType string
Default ["https://www.googleapis.com/auth/cloud-platform"]
Validation RegExp \d+\.\d+\.\d+(-[a-zA-Z0-9-]+)?|([a-zA-Z0-9]+)
Default PT5S
Format duration
Default PT1H
Format duration
Min length 1
SubType
SubType string
SubType string
Default IF_NOT_PRESENT
Possible Values
IF_NOT_PRESENTALWAYSNEVER
SubType string
Default true
SubType
SubType string
Default [ "" ]
SubType string
Default VOLUME
Possible Values
MOUNTVOLUME
Default PT0S
Format duration
SubType string
Default IF_NOT_PRESENT
Possible Values
IF_NOT_PRESENTALWAYSNEVER
Validation RegExp \d+\.\d+\.\d+(-[a-zA-Z0-9-]+)?|([a-zA-Z0-9]+)
SubType string
Default true
SubType array
SubType string
SubType string
Default PT5S
Format duration
Default true
Default { "request": { "memory": "2048", "cpu": "1" } }
Default true
Default PT15M
Format duration
Validation RegExp \d+\.\d+\.\d+(-[a-zA-Z0-9-]+)?|([a-zA-Z0-9]+)
Default PT1H
Format duration