spa_rk_db Flashcards
topandas
spark.toPandas()
from pandas
spark.createDataFrame(pandasdf, schema)
read and write
spark. read()
spark. write()
spark. read.jdbl(url=database jdbc url path, table=”tablename”)
spark. write.mode(“overwrite/append”).jdbl(url=database jdbc url path, table=”tablename”)
#csv write #save without changing the schema ??
show
spark.show(no of rows to show as integer)
filter based on condition
df. filter(“experiment_id = 1”).show()
df. filter((df.col1 == condition) & (df.col2 == condition))
df. filter((df.col1 == condition) & (df.col2 == condition)).select(colname)
replace values on condition
df.replace(old,new)
sparkdf select
sparkdf.select(“col name”)
select row
df = spark.createDataFrame([[1,2], [3,4]], [‘a’, ‘b’])
n=1 df.select(df.columns[n]).show() \+---+ | b| \+---+ | 2| | 4| \+---+
df.drop(df.columns[n]).show() \+---+ | a| \+---+ | 1| | 3| \+---+
df.select(df.columns[:n] + df.columns[n+1:]).show()
\+---+ | a| \+---+ | 1| | 3| \+---+
import pyspark types to construct schema
from pypark.sql.types import *
run sql query
spark.sql(“select * from table –limt 100”)
connect to an external database using pyodbc
import pyodbc
Driver=’{ODBC Driver 17 for SQL Server}’
Server= sql_server
Database=sql_db
cnxn = pyodbc.connect(‘DRIVER=’+Driver+’;SERVER=’+Server.split(“:”)[0]+’;DATABASE=’+Database+’;UID=’+sql_user+’;PWD=’+sql_pass)
cnxn = pyodbc.connect(‘DRIVER=’+Driver+’;SERVER=’+Server.split(“:”)[0]+’;DATABASE=’+Database+’;UID=’+sql_user+’;PWD=’+sql_pass)
cursor = cnxn.cursor()
cursor. execute(“update Experiment set experimentStatus = ‘Completed’ where experimentId = {}”.format(experiment_id))
cnxn. commit()
get secrets in databricks
dbutils.secrets.get(‘nlpexplorer-secret-scope ‘,’sql-username ‘)
first create the key:value pair in key vault
create secrets in databricks
https://docs.microsoft.com/en-us/azure/databricks/security/secrets/secret-scopes
databricks jdbc url
Obtain from database
curl comments for installing sql driver
%sh
curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add -
curl https://packages.microsoft.com/config/ubuntu/16.04/prod.list > /etc/apt/sources.list.d/mssql-release.list
sudo apt-get update
sudo ACCEPT_EULA=Y apt-get -q -y install msodbcsql17