{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "bf71b901-7db8-488a-89ac-4bcd0d608833", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Defaulting to user installation because normal site-packages is not writeable\n", "Collecting pyspark\n", " Downloading pyspark-3.5.1.tar.gz (317.0 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m317.0/317.0 MB\u001b[0m \u001b[31m10.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25ldone\n", "\u001b[?25hCollecting py4j==0.10.9.7 (from pyspark)\n", " Downloading py4j-0.10.9.7-py2.py3-none-any.whl.metadata (1.5 kB)\n", "Downloading py4j-0.10.9.7-py2.py3-none-any.whl (200 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m200.5/200.5 kB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hBuilding wheels for collected packages: pyspark\n", " Building wheel for pyspark (setup.py) ... \u001b[?25ldone\n", "\u001b[?25h Created wheel for pyspark: filename=pyspark-3.5.1-py2.py3-none-any.whl size=317488513 sha256=c36ce95f694a508d99b256477e37403d6e161f1bbc817f12d1cae31ef64fe9c3\n", " Stored in directory: /home/mohsen/.cache/pip/wheels/80/1d/60/2c256ed38dddce2fdd93be545214a63e02fbd8d74fb0b7f3a6\n", "Successfully built pyspark\n", "Installing collected packages: py4j, pyspark\n", "Successfully installed py4j-0.10.9.7 pyspark-3.5.1\n" ] } ], "source": [ "!pip install pyspark" ] }, { "cell_type": "code", "execution_count": 2, "id": "0c9f3cf6-2850-4602-a462-09dfc682eb79", "metadata": {}, "outputs": [], "source": [ "import pyspark " ] }, { "cell_type": "code", "execution_count": 1, "id": "4b3b6a87-2ad8-49f0-a224-24367883c924", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "openjdk version \"11.0.22\" 2024-01-16\n", "OpenJDK Runtime Environment (build 11.0.22+7-post-Ubuntu-0ubuntu222.04.1)\n", "OpenJDK 64-Bit Server VM (build 11.0.22+7-post-Ubuntu-0ubuntu222.04.1, mixed mode, sharing)\n", "\n" ] } ], "source": [ "import subprocess\n", "\n", "def check_java_version():\n", " try:\n", " java_version = subprocess.check_output([\"java\", \"-version\"], stderr=subprocess.STDOUT)\n", " print(java_version.decode(\"utf-8\"))\n", " except subprocess.CalledProcessError as e:\n", " print(\"Error checking Java version:\", e.output.decode(\"utf-8\"))\n", "\n", "check_java_version()" ] }, { "cell_type": "code", "execution_count": 2, "id": "c720b5ec-dbb1-4c74-9655-0d2d72c21c9e", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "24/05/29 20:30:50 WARN Utils: Your hostname, AlienDVD resolves to a loopback address: 127.0.1.1; using 172.25.214.222 instead (on interface eth0)\n", "24/05/29 20:30:50 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address\n", "Setting default log level to \"WARN\".\n", "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n", "24/05/29 20:30:52 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n", " \r" ] }, { "name": "stdout", "output_type": "stream", "text": [ "+---+-----+\n", "| id| name|\n", "+---+-----+\n", "| 1|Alice|\n", "| 2| Bob|\n", "| 3|Cathy|\n", "+---+-----+\n", "\n" ] } ], "source": [ "from pyspark.sql import SparkSession\n", "\n", "# Initialize Spark Session\n", "spark = SparkSession.builder.appName(\"TestPySpark\").getOrCreate()\n", "\n", "# Create a DataFrame\n", "df = spark.createDataFrame([(1, \"Alice\"), (2, \"Bob\"), (3, \"Cathy\")], [\"id\", \"name\"])\n", "\n", "# Show the DataFrame\n", "df.show()\n", "\n", "# Stop the Spark Session\n", "spark.stop()\n" ] }, { "cell_type": "code", "execution_count": null, "id": "917a35cb-2920-43d6-a055-43a619c22f71", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 5 }