Failed to connect to Vertica from Spark using Spark Connector under ssl
I am able to get the metadata of the table but as when I try to show the data using "df.show()" it fails and give this below error:
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID 0, localhost, executor driver): java.sql.SQLNonTransientConnectionException: [Vertica]VJDBC Failed to connect to host on port 5433. Reason: Operation timed out
at com.vertica.io.ProtocolStream.(Unknown Source)
at com.vertica.core.VConnection.connect(Unknown Source)
at com.vertica.jdbc.common.BaseConnectionFactory.doConnect(Unknown Source)
at com.vertica.jdbc.common.AbstractDriver.connect(Unknown Source)
at java.sql/java.sql.DriverManager.getConnection(DriverManager.java:677)
at java.sql/java.sql.DriverManager.getConnection(DriverManager.java:189)
at com.vertica.spark.datasource.VerticaDataSourceRDD$.getConnector(VerticaRDD.scala:176)
at com.vertica.spark.datasource.VerticaDataSourceRDD$$anonfun$scanTable$1.apply(VerticaRDD.scala:205)
at com.vertica.spark.datasource.VerticaDataSourceRDD$$anonfun$scanTable$1.apply(VerticaRDD.scala:205)
at com.vertica.spark.datasource.VerticaDataSourceRDD$$anon$1.(VerticaRDD.scala:338)
at com.vertica.spark.datasource.VerticaDataSourceRDD.compute(VerticaRDD.scala:330)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:121)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:830)
Caused by: com.vertica.support.exceptions.NonTransientConnectionException: [Vertica]VJDBC Failed to connect to host on port 5433. Reason: Operation timed out
... 33 more
Caused by: java.net.ConnectException: Operation timed out
at java.base/sun.nio.ch.Net.connect0(Native Method)
at java.base/sun.nio.ch.Net.connect(Net.java:493)
at java.base/sun.nio.ch.Net.connect(Net.java:482)
at java.base/sun.nio.ch.NioSocketImpl.connect(NioSocketImpl.java:588)
at java.base/java.net.SocksSocketImpl.connect(SocksSocketImpl.java:339)
at java.base/java.net.Socket.connect(Socket.java:603)
at com.vertica.io.VStream.changeSocket(Unknown Source)
at com.vertica.io.VStream.(Unknown Source)
... 33 more
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1889)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1877)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1876)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1876)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:926)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:926)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:926)
...
Cause: java.sql.SQLNonTransientConnectionException: [Vertica]VJDBC Failed to connect to host on port 5433. Reason: Operation timed out
at com.vertica.io.ProtocolStream.(Unknown Source)
at com.vertica.core.VConnection.connect(Unknown Source)
at com.vertica.jdbc.common.BaseConnectionFactory.doConnect(Unknown Source)
at com.vertica.jdbc.common.AbstractDriver.connect(Unknown Source)
at java.sql/java.sql.DriverManager.getConnection(DriverManager.java:677)
at java.sql/java.sql.DriverManager.getConnection(DriverManager.java:189)
at com.vertica.spark.datasource.VerticaDataSourceRDD$.getConnector(VerticaRDD.scala:176)
at com.vertica.spark.datasource.VerticaDataSourceRDD$$anonfun$scanTable$1.apply(VerticaRDD.scala:205)
at com.vertica.spark.datasource.VerticaDataSourceRDD$$anonfun$scanTable$1.apply(VerticaRDD.scala:205)
at com.vertica.spark.datasource.VerticaDataSourceRDD$$anon$1.(VerticaRDD.scala:338)
...
Cause: com.vertica.support.exceptions.NonTransientConnectionException: [Vertica]VJDBC Failed to connect to host on port 5433. Reason: Operation timed out
at com.vertica.io.ProtocolStream.(Unknown Source)
at com.vertica.core.VConnection.connect(Unknown Source)
at com.vertica.jdbc.common.BaseConnectionFactory.doConnect(Unknown Source)
at com.vertica.jdbc.common.AbstractDriver.connect(Unknown Source)
at java.sql/java.sql.DriverManager.getConnection(DriverManager.java:677)
at java.sql/java.sql.DriverManager.getConnection(DriverManager.java:189)
at com.vertica.spark.datasource.VerticaDataSourceRDD$.getConnector(VerticaRDD.scala:176)
at com.vertica.spark.datasource.VerticaDataSourceRDD$$anonfun$scanTable$1.apply(VerticaRDD.scala:205)
at com.vertica.spark.datasource.VerticaDataSourceRDD$$anonfun$scanTable$1.apply(VerticaRDD.scala:205)
at com.vertica.spark.datasource.VerticaDataSourceRDD$$anon$1.(VerticaRDD.scala:338)
...
Cause: java.net.ConnectException: Operation timed out
at java.base/sun.nio.ch.Net.connect0(Native Method)
at java.base/sun.nio.ch.Net.connect(Net.java:493)
at java.base/sun.nio.ch.Net.connect(Net.java:482)
at java.base/sun.nio.ch.NioSocketImpl.connect(NioSocketImpl.java:588)
at java.base/java.net.SocksSocketImpl.connect(SocksSocketImpl.java:339)
at java.base/java.net.Socket.connect(Socket.java:603)
at com.vertica.io.VStream.changeSocket(Unknown Source)
at com.vertica.io.VStream.(Unknown Source)
at com.vertica.io.ProtocolStream.(Unknown Source)
at com.vertica.core.VConnection.connect(Unknown Source)
...
Answers
Does non SSL based work when using spark connector?
No, It gave authentication denied for username error
Could you please open a support case. It requires reviewing logs and set up.
Okay