Amazon EC2 Recipe for R

Tags:

I’ve written a preliminary script to run my R processes on amazon ec2. Don’t know if I will end up with buying a new PC or with being an enthusiastic ec2 user.

Run:
./ec2_ready.sh && ./ec2_work.sh

ec2.config

 
_your_server_.compute.amazonaws.com

common.sh

#!/bin/bash
export SERVER=ubuntu@`cat ec2.config`
export PEM=_your_pem_file_.pem
export COPY="scp -i ${PEM}"
export SSH="ssh -t -t -oStrictHostKeyChecking=no -i ${PEM}"

ec2_ready.sh

#!/bin/bash
source common.sh

tar cvzf code.tgz _your_code_

${COPY} code.tgz ${DEST}:
${SSH} ${SERVER} << SSH_END
# Prepare
sudo apt-get update
sudo apt-get -y install r-base-core

tar xvzf code.tgz

# Prepare R
cat > .Rprofile << END
options(repos="_your_favorite_repo_")
END

sudo R --no-save << END
install.packages("randomForest")
install.packages("DMwR")
install.packages("DAAG")
install.packages("doBy")
install.packages("e1071")
install.packages("gbm")
install.packages("party")
install.packages("plyr")
install.packages("stringr")
END

# Prepare shutdown, so that it terminates at least within 24hrs.
# Otherwise, it takes money!
echo "sudo halt" | at now + 1440 min

SSH_END

ec2_work.sh

#!/bin/bash
source common.sh

${SSH} ${SERVER} << SSH_END
R --no-save << END > log.txt 2>&1
data(iris)
rf <- randomForest(Species ~., data=iris)
summary(rf)
save(file="work.RData")
END
SSH_END

${COPY} ${SERVER}:log.txt .
${COPY} ${SERVER}:work.RData .