val configStrs = """ "spark.sql.catalogImplementation" = "hive", "spark.hadoop.javax.jdo.option.ConnectionDriverName" = "org.apache.derby.jdbc.EmbeddedDriver", "spark.hadoop.javax.jdo.option.ConnectionURL" = "jdbc:derby:memory:myInMemDB;create=true", "spark.hadoop.javax.jdo.option.ConnectionUserName" = "hiveuser", "spark.hadoop.javax.jdo.option.ConnectionPassword" = "hivepass", "spark.hadoop.datanucleus.autoCreateSchema" = "true", "spark.hadoop.datanucleus.autoCreateTables" = "true", "spark.hadoop.datanucleus.fixedDatastore" = "false", """ val initScriptContent = s""" |#!/bin/bash | |cat << 'EOF' > /databricks/driver/conf/00-custom-spark-driver-embedded-metastore.conf |[driver] { |$configStrs |} |EOF |cat > /databricks/common/conf/disable-metastore-monitor.conf << EOL |{ | databricks.daemon.driver.enableMetastoreMonitor = false, | databricks.daemon.driver.enableMetastoreHealthCheck = false, |} |EOL """.stripMargin dbutils.fs.put("dbfs:/databricks/init_scripts/set_spark_embedded_metastore.sh",initScriptContent, true)
Wrote 838 bytes.
configStrs: String =
"
"spark.sql.catalogImplementation" = "hive",
"spark.hadoop.javax.jdo.option.ConnectionDriverName" = "org.apache.derby.jdbc.EmbeddedDriver",
"spark.hadoop.javax.jdo.option.ConnectionURL" = "jdbc:derby:memory:myInMemDB;create=true",
"spark.hadoop.javax.jdo.option.ConnectionUserName" = "hiveuser",
"spark.hadoop.javax.jdo.option.ConnectionPassword" = "hivepass",
"spark.hadoop.datanucleus.autoCreateSchema" = "true",
"spark.hadoop.datanucleus.autoCreateTables" = "true",
"spark.hadoop.datanucleus.fixedDatastore" = "false",
"
initScriptContent: String =
"
#!/bin/bash
cat << 'EOF' > /databricks/driver/conf/00-custom-spark-driver-embedded-metastore.conf
[driver] {
"spark.sql.catalogImplementation" = "hive",
"spark.hadoop.javax.jdo.option.ConnectionDriverName" = "org.apache.derby.jdbc.EmbeddedDriver",
"spark.hadoop.javax.jdo.option.ConnectionURL" = "jdbc:derby:memory:myInMemDB;create=true",
"spark.hadoop.javax.jdo.option.ConnectionUserName" = "hiveuser",
"spark.hadoop.javax.jdo.option.ConnectionPassword" = "hivepass",
"spark.hadoop.datanucleus.autoCreateSchema" = "true",
"spark.hadoop.datanucleus.autoCreateTables" = "true",
"spark.hadoop.datanucleus.fixedDatastore" = "false",
}
EOF
cat > /databricks/common/conf/disable-metastore-monitor.conf << EOL
{
databricks.daemon.driver.enableMetastoreMonitor = false,
databricks.daemon.driver.enableMetastoreHealthCheck = false,
}
EOL
"
res0: Boolean = true
dbutils.fs.head("dbfs:/databricks/init_scripts/set_spark_embedded_metastore.sh")
res2: String =
"
#!/bin/bash
cat << 'EOF' > /databricks/driver/conf/00-custom-spark-driver-embedded-metastore.conf
[driver] {
"spark.sql.catalogImplementation" = "hive",
"spark.hadoop.javax.jdo.option.ConnectionDriverName" = "org.apache.derby.jdbc.EmbeddedDriver",
"spark.hadoop.javax.jdo.option.ConnectionURL" = "jdbc:derby:memory:myInMemDB;create=true",
"spark.hadoop.javax.jdo.option.ConnectionUserName" = "hiveuser",
"spark.hadoop.javax.jdo.option.ConnectionPassword" = "hivepass",
"spark.hadoop.datanucleus.autoCreateSchema" = "true",
"spark.hadoop.datanucleus.autoCreateTables" = "true",
"spark.hadoop.datanucleus.fixedDatastore" = "false",
}
EOF "
sc.getConf.getAll.toSeq.filter(x => x._1.contains("spark.sql.catalogImplementation") || x._1.contains("spark.hadoop.javax") || x._1.contains("datanucleus")).foreach(println) /** In theory, it should print configs that we specified: (spark.hadoop.datanucleus.fixedDatastore,false) (spark.hadoop.javax.jdo.option.ConnectionDriverName,org.apache.derby.jdbc.EmbeddedDriver) (spark.hadoop.javax.jdo.option.ConnectionPassword,hivepass) (spark.hadoop.datanucleus.autoCreateTables,true) (spark.hadoop.javax.jdo.option.ConnectionURL,jdbc:derby:memory:myInMemDB;create=true) (spark.sql.catalogImplementation,hive) (spark.hadoop.datanucleus.autoCreateSchema,true) (spark.hadoop.javax.jdo.option.ConnectionUserName,hiveuser) */
(spark.hadoop.datanucleus.fixedDatastore,false)
(spark.hadoop.javax.jdo.option.ConnectionDriverName,org.apache.derby.jdbc.EmbeddedDriver)
(spark.hadoop.javax.jdo.option.ConnectionPassword,hivepass)
(spark.hadoop.datanucleus.autoCreateTables,true)
(spark.hadoop.javax.jdo.option.ConnectionURL,jdbc:derby:memory:myInMemDB;create=true)
(spark.sql.catalogImplementation,hive)
(spark.hadoop.datanucleus.autoCreateSchema,true)
(spark.hadoop.javax.jdo.option.ConnectionUserName,hiveuser)
import java.util.concurrent.{CountDownLatch, Executors, TimeUnit} import java.sql.Timestamp import java.util.Date import java.util.concurrent.atomic.AtomicInteger import scala.collection.parallel.ForkJoinTaskSupport import scala.sys.process._ import org.apache.spark.sql.hive.HiveExternalCatalog import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.hive.client.PoolingHiveClient import org.apache.spark.sql.catalyst.catalog.CatalogTablePartition import org.apache.spark.util.{SerializableConfiguration, ThreadUtils, Utils} import com.databricks.backend.common.util.Project import com.databricks.backend.daemon.driver.DriverConf import com.databricks.conf.trusted.ProjectConf import com.databricks.sql.DatabricksStaticSQLConf // Get the port of the metastore RDS connection from Databricks conf: val dbConf = new DriverConf(ProjectConf.loadLocalConfig(Project.Driver)) val port = dbConf.internalMetastorePort
import java.util.concurrent.{CountDownLatch, Executors, TimeUnit}
import java.sql.Timestamp
import java.util.Date
import java.util.concurrent.atomic.AtomicInteger
import scala.collection.parallel.ForkJoinTaskSupport
import scala.sys.process._
import org.apache.spark.sql.hive.HiveExternalCatalog
import org.apache.spark.sql.hive.HiveUtils
import org.apache.spark.sql.hive.client.PoolingHiveClient
import org.apache.spark.sql.catalyst.catalog.CatalogTablePartition
import org.apache.spark.util.{SerializableConfiguration, ThreadUtils, Utils}
import com.databricks.backend.common.util.Project
import com.databricks.backend.daemon.driver.DriverConf
import com.databricks.conf.trusted.ProjectConf
import com.databricks.sql.DatabricksStaticSQLConf
dbConf: com.databricks.backend.daemon.driver.DriverConf = com.databricks.backend.daemon.driver.DriverConf(driver)
port: Int = 3306
1. Check if a cluster scoped init script exists with the same name
Last refresh: Never