Sunday, July 21, 2013

Cloudstack EMR API developement series Episode 1:how to add a basic launchHadoopCluster API to CloudStack.

The aim of my GSOC project is to add EMR-like APIs to Cloudstack, so that users can take advantage of Whirr to provision hadoop cluster on Cloudstack. Instead of jumping to write a EMR-compaitable api directly, I created a simple API launchHadoopCluster just to get some sense how API in CloudStack is developed and how to pass parameters/get responses to and from CloudStack web service.

The lauchHadoopCluster API has a structure looks like below:
Request parameters

Parameter Names Description Required
config The config file used by Whirr to define a cluster true
Response Tags
Response Name Description
whirroutput The output of running whirr on CloudStack

  1. Checkout the latest CloudStack source code.
    git clone https://git-wip-us.apache.org/repos/asf/cloudstack.git
  2. Create a directory for the plugin off of the plugins folder. Under this new folder, create a code hierarchy like the following tree.
    |-- src
    |   `-- org
    |       `-- apache
    |           `-- cloudstack
    |               |-- api
    |               |   `-- command
    |               |       `-- user
    |               |           `-- emr
    |               `-- emr
    |-- target
    `-- test
    
  3. Create a pom.xml for the emr module. The contents of the file is as follows:
    <project xmlns="http://maven.apache.org/POM/4.0.0"
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
    http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0
    <artifactId>cloud-plugin-api-emr
    <name>Apache CloudStack Plugin - Elastic Map Reduce Plugin
    <parent>
    <groupId>org.apache.cloudstack
    <artifactId>cloudstack-plugins
    <version>4.2.0-SNAPSHOT
    <relativePath>../../pom.xml
    </parent>
    <dependencies>
    <dependency>
    <groupId>org.apache.cloudstack
    <artifactId>cloud-api
    <version>${project.version}
    </dependency>
    <dependency>
    <groupId>org.apache.cloudstack
    <artifactId>cloud-utils
    <version>${project.version}
    </dependency>
    </dependencies>
    <build>
    <defaultGoal>install
    <sourceDirectory>src
    <testSourceDirectory>test
    </build>
    </project>
    
  4. Now I can open this project in NetBeans and begin to generate the source files. Navigate to plugins/api/echo/src/org/apache/cloudstack/emr, create an interface ElasticMapReduce.java that extends from PluggableService.
    package org.apache.cloudstack.emr;
    
    import com.cloud.utils.component.PluggableService;
    
    public interface ElasticMapReduce extends PluggableService { }
    
  5. Create an implementation of the interface. Name it ElasticMapReduceImpl.java.
    package org.apache.cloudstack.emr;
    
    import java.util.ArrayList;
    import java.util.List;
    import javax.ejb.Local;
    import org.apache.cloudstack.api.command.user.emr.LaunchHadoopClusterCmd;
    import org.apache.log4j.Logger;
    import org.springframework.stereotype.Component;
    
    @Component
    @Local(value = ElasticMapReduce.class)
    
    public class ElasticMapReduceImpl implements ElasticMapReduce{
        private static final Logger s_logger = Logger.getLogger(ElasticMapReduceImpl.class);
        
        public ElasticMapReduceImpl(){
            super();
        }
        
        @Override
        public List> getCommands() {
        List> cmdList = new ArrayList>();
        cmdList.add(LaunchHadoopClusterCmd.class);
        return cmdList;
        } 
    }
    
  6. Navigate to plugins/api/emr/src/org/apache/cloudstack/emr/cmd, create the source file for the command and its response.
    LaunchHadoopClusterCmd.java
    
    package org.apache.cloudstack.api.command.user.emr;
    
    
    import java.io.IOException;
    import java.io.OutputStream;
    import java.util.logging.Level;
    import java.util.logging.Logger;
    import org.apache.cloudstack.api.APICommand;
    import org.apache.cloudstack.api.BaseCmd;
    import org.apache.cloudstack.api.Parameter;
    
    @APICommand(name = "launchHadoopCluster", responseObject = LaunchHadoopClusterCmdResponse.class, description = "Launch a hadoop cluster using whirr on CloudStack", since ="4.2.0")
    public class LaunchHadoopClusterCmd extends BaseCmd{
        @Parameter(name="config", type=CommandType.STRING, required=true, description="the configuation file to define a cluster")
        
        private String config;
        private String cmdName = "launchHadoopCluster";
        private String output;
        
        @Override
        public void execute()  {
            LaunchHadoopClusterCmdResponse response = new LaunchHadoopClusterCmdResponse();
            response.setObjectName("launchHadoopCluster");
            response.setResponseName(getCommandName());
            
            String cmdToExec;
            cmdToExec = "whirr launch-cluster --config "+ config;
            try {
               OutputStream out = Runtime.getRuntime().exec(cmdToExec).getOutputStream();
               output = out.toString();
            } catch (IOException ex) {
                Logger.getLogger(LaunchHadoopClusterCmd.class.getName()).log(Level.SEVERE, null, ex);
            }
            response.setOutPut(output);
            this.setResponseObject(response);
        }
    
        @Override
        public String getCommandName() {
            return cmdName;
        }
    
        @Override
        public long getEntityOwnerId() {
            return 0;
        }
        
    }
    
    LaunchHadoopClusterCmdResponse.java
    
    package org.apache.cloudstack.api.command.user.emr;
    
    import com.cloud.serializer.Param;
    import com.google.gson.annotations.SerializedName;
    import org.apache.cloudstack.api.ApiConstants;
    import org.apache.cloudstack.api.BaseResponse;
    
    public class LaunchHadoopClusterCmdResponse extends BaseResponse {
        @SerializedName(ApiConstants.IS_ASYNC) @Param(description = "true if api is asynchronous")
        private Boolean isAsync;
        @SerializedName("output") @Param(description = "whirr output")
        private String output;
        
        public LaunchHadoopClusterCmdResponse(){
            
        }
        public void setAsync(Boolean isAsync) {
            this.isAsync = isAsync;
        }
     
        public boolean getAsync() {
            return isAsync;
        }
        public void setOutPut(String output) {
            this.output = output;
        }
    }
    
    
  7. Add the following dependency to cloudstack/client/pom.xml.
    <dependency>
    <groupId>org.apache.cloudstack</groupId>
    <artifactId>cloud-plugin-api-emr</artifactId>
    <version>${project.version}</version>
    </dependency>
    
    When you added the emr plugin to the client pom file, Maven will download and link the emr plugin for you on compilation and other goals that requires them.
  8. Update client/tomcatconf/componentContext.xml.in and add the following bean:
     <bean id="elasticMapReduceImpl" class="org.apache.cloudstack.emr.ElasticMapReduceImpl" />
    
  9. Update plugins/pom.xml to add the following module.
    <module>api/emr</module>
    
  10. Add the command to client/tomcatconf/commands.properties.in
    launchHadoopCluster=15
    
  11. Now lets compile your code and test it!
    1. Navigate to plugins/api/emr and run:
      mvn  clean install
      
    2. In cloudstack base directory run:
      mvn -pl client clean install
      
    3. Start the Management server UI.

No comments:

Post a Comment