Spark - Cassandra connector example

less than 1 minute read

Github
https://github.com/nsclass/spark-cassandra-example

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
object SparkCassandra {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder()
      .appName("SparkCassandra")
      .master("local")
      .config("spark.cassandra.connection.host", "127.0.0.1")
      .getOrCreate()
    // Read data as RDD
    val rdd = spark.sparkContext.cassandraTable(keyspace = "system", table = "local")
      .select("key", "cluster_name", "cql_version")
    println("Data read as RDD")
    rdd.collect()
      .foreach(row => {
        println(row.getString("key"))
        println(row.getString("cluster_name"))
        println(row.getString("cql_version"))
      })
    // Read data as DataSet (DataFrame)
    val dataset = spark.read
      .cassandraFormat(keyspace = "system", table = "local")
      .load()
    dataset.collect()
      .foreach(row => {
        println(row.getAs("key"))
        println(row.getAs("cluster_name"))
        println(row.getAs("cql_version"))
      })
  }
}