In [1]:
!pip install pyspark
Defaulting to user installation because normal site-packages is not writeable Collecting pyspark Downloading pyspark-3.5.1.tar.gz (317.0 MB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 317.0/317.0 MB 10.3 MB/s eta 0:00:0000:0100:01 Preparing metadata (setup.py) ... done Collecting py4j==0.10.9.7 (from pyspark) Downloading py4j-0.10.9.7-py2.py3-none-any.whl.metadata (1.5 kB) Downloading py4j-0.10.9.7-py2.py3-none-any.whl (200 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 200.5/200.5 kB 7.6 MB/s eta 0:00:00 Building wheels for collected packages: pyspark Building wheel for pyspark (setup.py) ... done Created wheel for pyspark: filename=pyspark-3.5.1-py2.py3-none-any.whl size=317488513 sha256=c36ce95f694a508d99b256477e37403d6e161f1bbc817f12d1cae31ef64fe9c3 Stored in directory: /home/mohsen/.cache/pip/wheels/80/1d/60/2c256ed38dddce2fdd93be545214a63e02fbd8d74fb0b7f3a6 Successfully built pyspark Installing collected packages: py4j, pyspark Successfully installed py4j-0.10.9.7 pyspark-3.5.1
In [2]:
import pyspark
In [1]:
import subprocess
def check_java_version():
try:
java_version = subprocess.check_output(["java", "-version"], stderr=subprocess.STDOUT)
print(java_version.decode("utf-8"))
except subprocess.CalledProcessError as e:
print("Error checking Java version:", e.output.decode("utf-8"))
check_java_version()
openjdk version "11.0.22" 2024-01-16 OpenJDK Runtime Environment (build 11.0.22+7-post-Ubuntu-0ubuntu222.04.1) OpenJDK 64-Bit Server VM (build 11.0.22+7-post-Ubuntu-0ubuntu222.04.1, mixed mode, sharing)
In [2]:
from pyspark.sql import SparkSession
# Initialize Spark Session
spark = SparkSession.builder.appName("TestPySpark").getOrCreate()
# Create a DataFrame
df = spark.createDataFrame([(1, "Alice"), (2, "Bob"), (3, "Cathy")], ["id", "name"])
# Show the DataFrame
df.show()
# Stop the Spark Session
spark.stop()
24/05/29 20:30:50 WARN Utils: Your hostname, AlienDVD resolves to a loopback address: 127.0.1.1; using 172.25.214.222 instead (on interface eth0) 24/05/29 20:30:50 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address Setting default log level to "WARN". To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel). 24/05/29 20:30:52 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
+---+-----+ | id| name| +---+-----+ | 1|Alice| | 2| Bob| | 3|Cathy| +---+-----+
In [ ]: