Skip to content
Permalink
Browse files

Support pulling python dependency from maven (#438)

* remove backend submodule

* add exception message

* typo

* fix path

* revert

* fix

* fix
  • Loading branch information...
zhichao-li committed Jun 26, 2018
1 parent 4417455 commit 95a05cd6d804aa44d1b51e46cbead767d027bc59
Showing with 87 additions and 78 deletions.
  1. +1 −0 .gitignore
  2. +1 −38 make-dist.sh
  3. +1 −1 pyzoo/dev/prepare_env.sh
  4. +52 −0 scripts/get_bigdl_python
  5. +24 −35 zoo/pom.xml
  6. +5 −1 zoo/src/assembly/dist-all.xml
  7. +1 −1 zoo/src/assembly/dist.xml
  8. +2 −2 zoo/src/assembly/python-zip.xml
@@ -3,6 +3,7 @@
*.class
target/
/dist/
pyzoo/.pytest_cache/

# SBT, Maven specific
.cache
@@ -24,18 +24,6 @@ set -e

BASEDIR=$(dirname "$0")

# Check bigdl backend
if [ ! -d $BASEDIR/backend/bigdl ]; then
echo "backend/bigdl does not exist. Please try to execute: git submodule update --init --recursive"
exit 1
fi

# Check spark conf
if [ ! -f $BASEDIR/backend/bigdl/spark/dl/src/main/resources/spark-bigdl.conf ]; then
echo "Conf file does not exist. Please check: $BASEDIR/backend/bigdl/spark/dl/src/main/resources/spark-bigdl.conf"
exit 1
fi

# Check java
if type -p java>/dev/null; then
_java=java
@@ -67,36 +55,11 @@ if [ $MVN_INSTALL -eq 0 ]; then
exit 1
fi

args=`echo $*`
if [[ $args = *"build_backend"* ]]; then
echo "Full build!, Let's install bigdl first"
cd ${BASEDIR}
mv ${BASEDIR}/backend/bigdl/spark/dl/pom.xml ${BASEDIR}/backend/bigdl/spark/dl/pom.xml.origin
cat ${BASEDIR}/backend/bigdl/spark/dl/pom.xml.origin \
| sed 's/ <artifactId>bigdl<\/artifactId>/<artifactId>zoo_bigdl<\/artifactId>/' > ${BASEDIR}/backend/bigdl/spark/dl/pom.xml
command="mvn install -DskipTests $*"
cd backend/bigdl
echo "Executing: $command"
$command
cd ../../
fi
echo "Start to build analytics-zoo at `pwd`"

mvn clean package -DskipTests $*

DIST_DIR=$BASEDIR/dist

# Clean dist folder
rm -rf $DIST_DIR
mkdir -p $DIST_DIR/lib
mkdir -p $DIST_DIR/conf
mkdir -p $DIST_DIR/bin
mkdir -p $DIST_DIR/extra-resources
mkdir -p $DIST_DIR/apps
cp -r $BASEDIR/zoo/target/analytics-zoo-*-dist-all $DIST_DIR

cp -r $BASEDIR/zoo/target/*.jar $DIST_DIR/lib/
cp -r $BASEDIR/zoo/target/*.zip $DIST_DIR/lib/
cp $BASEDIR/backend/bigdl/spark/dl/src/main/resources/spark-bigdl.conf $DIST_DIR/conf/spark-analytics-zoo.conf
cp -r $BASEDIR/scripts/* $DIST_DIR/bin/
cp -r $BASEDIR/apps/* $DIST_DIR/apps/
cp $BASEDIR/zoo/target/extra-resources/zoo-version-info.properties $DIST_DIR/extra-resources/
@@ -30,7 +30,7 @@ if [ -z ${SPARK_HOME+x} ]; then echo "SPARK_HOME is unset"; exit 1; else echo "S

export PYSPARK_ZIP=`find $SPARK_HOME/python/lib -type f -iname '*.zip' | tr "\n" ":"`

export PYTHONPATH=$PYTHONPATH:$PYSPARK_ZIP:$DL_PYTHON_HOME:$ANALYTICS_ZOO_ROOT/backend/bigdl/pyspark:$ANALYTICS_ZOO_ROOT/backend/bigdl/spark/dl/src/main/resources/spark-bigdl.conf:$ANALYTICS_ZOO_ROOT/dist/conf/spark-analytics-zoo.conf:$ANALYTICS_ZOO_ROOT/zoo/target/extra-resources/zoo-version-info.properties
export PYTHONPATH=$PYSPARK_ZIP:$DL_PYTHON_HOME:$ANALYTICS_ZOO_ROOT/dist/bigdl_python:$ANALYTICS_ZOO_ROOT/zoo/target/bigdl_python/sources/:$ANALYTICS_ZOO_ROOT/zoo/target/bigdl_python/sources/spark-bigdl.conf:$ANALYTICS_ZOO_ROOT/dist/bigdl_python/spark-bigdl.conf:$ANALYTICS_ZOO_ROOT/dist/conf/spark-analytics-zoo.conf:$ANALYTICS_ZOO_ROOT/zoo/target/extra-resources/zoo-version-info.properties:$PYTHONPATH
echo "PYTHONPATH": $PYTHONPATH
export ANALYTICS_ZOO_CLASSPATH=$(find $ANALYTICS_ZOO_ROOT/zoo/target/ -name "*with-dependencies.jar" | head -n 1)
echo "ANALYTICS_ZOO_CLASSPATH": $ANALYTICS_ZOO_CLASSPATH
@@ -0,0 +1,52 @@
#!/usr/bin/env bash

# Copyright 2018 Analytics Zoo Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

if (( $# < 2)); then
echo "getting $* $#"
echo "Bad parameters. Usage example: bash get_bigdl_python.sh /tmp/bigdl_python/ 0.5.0"
exit -1
fi

DEST_DIR=$1
echo "DEST_DIR: $DEST_DIR"
BIGDL_VERSION=$2
echo "BIGDL_VERSION: $BIGDL_VERSION"

rm -rf $DEST_DIR/*

GET_COMMAND="mvn org.apache.maven.plugins:maven-dependency-plugin:2.4:get \
-DartifactId=dist-spark-2.2.0-scala-2.11.8-all \
-DgroupId=com.intel.analytics.bigdl \
-Dversion=${BIGDL_VERSION} \
-Dclassifier=python-api \
-Dpackaging=zip \
-Ddest=${DEST_DIR}/bigdl-python-api-${BIGDL_VERSION}.zip"

echo "Running: $GET_COMMAND"
$GET_COMMAND

exit_status=$?
if [ $exit_status -ne 0 ];
then
rm -rf ${DEST_DIR}
echo "Fail to get the BigDL python api from maven!!!!"
exit $exit_status_1
fi

cd ${DEST_DIR}
mkdir sources
unzip bigdl-python-api-${BIGDL_VERSION}.zip -d sources

@@ -18,14 +18,13 @@
<java.version>1.7</java.version>
<javac.version>1.7</javac.version>
<spark-scope>provided</spark-scope>
<bigdl-scope>provided</bigdl-scope>
<bigdl-scope>compile</bigdl-scope>
<scala.major.version>2.11</scala.major.version>
<scala.version>2.11.8</scala.version>
<scala.macros.version>2.1.0</scala.macros.version>
<scalatest.version>2.2.4</scalatest.version>
<spark.version>2.1.0</spark.version>
<bigdl.version>0.5.0</bigdl.version>
<backend.version>0.0.0</backend.version>
</properties>

<distributionManagement>
@@ -36,21 +35,6 @@
</distributionManagement>

<profiles>
<profile>
<id>build_backend</id>
<properties>
<bigdl-scope>provided</bigdl-scope>
</properties>
<dependencies>
<dependency>
<groupId>com.intel.analytics.bigdl</groupId>
<artifactId>zoo_bigdl</artifactId>
<version>${backend.version}</version>
<scope>compile</scope>
</dependency>
</dependencies>

</profile>
<profile>
<id>spark_1.6</id>
<properties>
@@ -61,16 +45,7 @@
<scala.macros.version>2.0.1</scala.macros.version>
<scalatest.version>2.2.4</scalatest.version>
<bigdl.artifactId>bigdl-SPARK_1.6</bigdl.artifactId>
<bigdl-scope>compile</bigdl-scope>
</properties>
<dependencies>
<dependency>
<groupId>com.intel.analytics.bigdl</groupId>
<artifactId>${bigdl.artifactId}</artifactId>
<version>${bigdl.version}</version>
<scope>${bigdl-scope}</scope>
</dependency>
</dependencies>
</profile>

<profile>
@@ -87,16 +62,7 @@
<scala.version>2.11.8</scala.version>
<scala.macros.version>2.1.0</scala.macros.version>
<bigdl.artifactId>bigdl-SPARK_${spark-version.project}</bigdl.artifactId>
<bigdl-scope>compile</bigdl-scope>
</properties>
<dependencies>
<dependency>
<groupId>com.intel.analytics.bigdl</groupId>
<artifactId>${bigdl.artifactId}</artifactId>
<version>${bigdl.version}</version>
<scope>${bigdl-scope}</scope>
</dependency>
</dependencies>
</profile>

<profile>
@@ -262,6 +228,12 @@
<version>${spark.version}</version>
<scope>${spark-scope}</scope>
</dependency>
<dependency>
<groupId>com.intel.analytics.bigdl</groupId>
<artifactId>${bigdl.artifactId}</artifactId>
<version>${bigdl.version}</version>
<scope>${bigdl-scope}</scope>
</dependency>
<dependency>
<groupId>org.spark-project.spark</groupId>
<artifactId>unused</artifactId>
@@ -325,6 +297,7 @@
<artifactId>exec-maven-plugin</artifactId>
<executions>
<execution>
<id>build_info</id>
<phase>generate-resources</phase>
<configuration>
<!-- Execute the shell script to generate the zoo build information. -->
@@ -343,6 +316,22 @@
<goal>exec</goal>
</goals>
</execution>
<execution>
<id>bigdl_python</id>
<phase>package</phase>
<configuration>
<arguments>
<argument>${project.basedir}/../scripts/get_bigdl_python</argument>
<argument>${project.build.directory}/bigdl_python</argument>
<argument>${bigdl.version}</argument>
</arguments>

<executable>bash</executable>
</configuration>
<goals>
<goal>exec</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
@@ -37,10 +37,14 @@
<include>zoo-version-info.properties</include>
</includes>
</fileSet>
<fileSet>
<outputDirectory>/bigdl_python</outputDirectory>
<directory>${project.build.directory}/bigdl_python/sources</directory>
</fileSet>
</fileSets>
<files>
<file>
<source>${project.parent.basedir}/backend/bigdl/spark/dl/src/main/resources/spark-bigdl.conf</source>
<source>${project.build.directory}/bigdl_python/sources/spark-bigdl.conf</source>
<outputDirectory>/conf</outputDirectory>
<destName>spark-analytics-zoo.conf</destName>
</file>
@@ -36,7 +36,7 @@
</fileSets>
<files>
<file>
<source>${project.parent.basedir}/backend/bigdl/spark/dl/src/main/resources/spark-bigdl.conf</source>
<source>${project.build.directory}/bigdl_python/sources/spark-bigdl.conf</source>
<outputDirectory>/conf</outputDirectory>
<destName>spark-analytics-zoo.conf</destName>
</file>
@@ -24,15 +24,15 @@
<exclude>docs/**/*</exclude>
</excludes>
<outputDirectory>/..</outputDirectory>
<directory>${project.parent.basedir}/backend/bigdl/pyspark</directory>
<directory>${project.build.directory}/bigdl_python/sources</directory>
</fileSet>

<fileSet>
<includes>
<include>**/*.conf</include>
</includes>
<outputDirectory>/..</outputDirectory>
<directory>${project.parent.basedir}/backend/bigdl/spark/dl/src/main/resources/</directory>
<directory>${project.build.directory}/bigdl_python/sources</directory>
</fileSet>

<fileSet>

0 comments on commit 95a05cd

Please sign in to comment.
You can’t perform that action at this time.