[cs615asa] [git commit] CS615 EBS-BACKUP; backup a directory into Elastic Block Storage (EBS) branch main updated. b82a956a62019c6959f57b23e15318ea437b36cb

Git Owner jschauma at stevens.edu
Mon Mar 29 19:48:40 EDT 2021


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "CS615 EBS-BACKUP; backup a directory into Elastic Block Storage (EBS)".

The branch, main has been updated
       via  b82a956a62019c6959f57b23e15318ea437b36cb (commit)
       via  c576349dc64c746a8d7f100316f3fe699220bf5d (commit)
       via  cf7ffbd7aaad3057463855642884c17820cea0bc (commit)
       via  59691d8c3e8ceb0551a8f77689c58899ed605176 (commit)
       via  b619988c169e5c0777707a59506fc58eccd0205c (commit)
      from  7015fef81360a1392692e3afe4990978e31b91f7 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit b82a956a62019c6959f57b23e15318ea437b36cb
Author: camatang <camatang at stevens.edu>
Date:   Mon Mar 22 22:34:02 2021 -0400

    updated requirements file

diff --git a/src/env_parsing.py b/src/env_parsing.py
index 9710296..1068fa4 100644
--- a/src/env_parsing.py
+++ b/src/env_parsing.py
@@ -1,4 +1,3 @@
-import os
 import environs
 
 def parse_env():
diff --git a/src/requirements.txt b/src/requirements.txt
index 3635457..9284c9c 100644
--- a/src/requirements.txt
+++ b/src/requirements.txt
@@ -1,8 +1,10 @@
-boto3==1.17.23
-botocore==1.20.23
-jmespath==0.10.0
-python-dateutil==2.8.1
-s3transfer==0.3.4
-six==1.15.0
-urllib3==1.26.3
-environs==9.3.1
\ No newline at end of file
+boto3==1.17.26
+botocore==1.20.26
+environs==9.3.1
+jmespath==0.10.0
+marshmallow==3.10.0
+python-dateutil==2.8.1
+python-dotenv==0.15.0
+s3transfer==0.3.4
+six==1.15.0
+urllib3==1.26.3

commit c576349dc64c746a8d7f100316f3fe699220bf5d
Merge: cf7ffbd 7015fef
Author: camatang <camatang at stevens.edu>
Date:   Mon Mar 22 22:30:35 2021 -0400

    Merge remote-tracking branch 'origin/main' into main

diff --cc .gitignore
index 62c8935,3e439ba..4ed4301
--- a/.gitignore
+++ b/.gitignore
@@@ -1,1 -1,3 +1,4 @@@
- .idea/
++.idea/
+ __pycache__/
+ **/__pycache__/
 -.vscode
++.vscode
diff --cc src/argument_parsing.py
index a81f9b6,df613cc..2195f7e
--- a/src/argument_parsing.py
+++ b/src/argument_parsing.py
@@@ -11,36 -11,32 +11,36 @@@ Options
    -r filter     Pass data through the given filter command on the remote
                  host before writing the data to the volume.
    -v volume-id  Use the given volume instead of creating a new one.
- """
+ '''
  import argparse
 -import sys
 +
  
  def parse_args(args):
    parser = argparse.ArgumentParser(
-       description="backup a directory into Elastic Block Storage (EBS)"
+       description='backup a directory into Elastic Block Storage (EBS)'
    )
    parser.add_argument(
-       "-l",
-       metavar="filter",
+       '-l',
+       metavar='filter',
        type=str,
-       help="Pass data through the given filter command on the local host before copying the\
-   data to the remote system.",
+       help='Pass data through the given filter command on the local host before copying the\
+   data to the remote system.',
    )
    parser.add_argument(
-       "-r",
-       metavar="filter",
+       '-r',
+       metavar='filter',
        type=str,
-       help="Pass data through the given filter command on the remote\
-                   host before writing the data to the volume.",
+       help='Pass data through the given filter command on the remote\
+                   host before writing the data to the volume.',
    )
    parser.add_argument(
-       "-v",
-       metavar="volume-id",
+       '-v',
+       metavar='volume-id',
        type=str,
-       help="Use the given volume instead of creating a new one.",
+       help='Use the given volume instead of creating a new one.',
    )
 -  return vars(parser.parse_args(args))
 +  parser.add_argument(
 +      "dir",
 +      help="Directory to backup",
 +  )
 +  return parser.parse_args(args)
diff --cc src/main.py
index 0e73e4c,463ecdc..aa5a0bb
--- a/src/main.py
+++ b/src/main.py
@@@ -1,111 -1,13 +1,111 @@@
 -import boto3
 -import sys
  import os
 +import subprocess
 +import sys
 +
 +import boto3
 +
 +from src.ec2 import EC2
 +from src.volume import Volume
  
- sys.path.append('..')
- from pkg import parse_args
+ from argument_parsing import parse_args
+ from env_parsing import parse_env
  
 -if __name__ == '__main__':
 -  args = parse_args(sys.argv[1:])
 -  env = parse_env()
 +#### TODO's ####
 +# [ ] determine whether to use subprocess or python logic for various operations
 +# [ ] create central logging utility around EBS_BACKUP_VERBOSE
 +# [ ] integrate local and remote filter logic
 +# [ ] potentially reuse the same instance or recreate each time?
 +
 +### Questions's ###
 +# [ ] can we have both local and remote filters?
 +
 +
 +ZONE = 'us-east-1'
 +
 +# Windows alternative to ~/.aws/credentials & ~/.aws/config (for local/testing purposes)
 +# achieves the same result of boto3.[resource|client]('service') via
 +# session.[resource|client]('service') but instead with explicit session configuration
 +session = boto3.Session(
 +    aws_access_key_id=os.getenv('AWS_ACCESS_KEY'),
 +    aws_secret_access_key=os.getenv('AWS_SECRET_KEY'),
 +    region_name=ZONE,
 +)
 +
 +
 +def upload(user, host, dir):
 +    # scp directory to be backed up to ec2 instance
 +    # flags = os.environ['EBS_BACKUP_VERBOSE']
 +    subprocess.call('scp {flags} {dir} {user}@{host}:/tmp'.format(
 +        flags='', user=user, host=host, dir=dir
 +    ).split(' '))
 +
 +# TODO: ensure we aren't overwriting something
 +def tar(user, host, dir):
 +    subprocess.call('tar czf - {dir} > {dir}.tgz'.format(
 +        user=user, host=host, dir=dir,
 +    ).split(' '))
  
 -  print(args, env)
 -  
 +    # # Result of this tar will get written to our the volume
 +    # flags = os.environ['EBS_BACKUP_VERBOSE']
 +    # if flags:
 +    #     subprocess.call('ssh {flags} {user}@{host} tar czf - tmp/{dir} > {dir}.tgz'.format(
 +    #         flags=flags, user=user, host=host, dir=dir,
 +    #     ).split(' '))
 +    # else:
 +    #     subprocess.call('ssh {user}@{host} tar czf - tmp/{dir} > {dir}.tgz'.split(' '))
 +
 +def backup(user, host, dir):
 +    subprocess.call('ssh {user}@{host} dd if=/tmp/{dir}.tgz of=/dev/sdf'.format(
 +        user=user, host=host, dir=dir
 +    ).split(' '))
 +
 +def calculate_dir_size(dir):
 +    # TODO: make sure this is x-platform compatible
 +    total = sum(d.stat().st_size for d in os.scandir(dir) if d.is_file())
 +    print(total)
 +    return total
 +
 +def backup(args):
 +    ec2 = None
 +
 +    try:
 +        size = calculate_dir_size(args.dir)
 +
 +        # 1. create ec2 ...
 +        ec2 = EC2(session, {
 +            'zone_id': ZONE + 'a',
 +        })
 +
 +        # 2. volume [creation] attachment ...
 +        vol = Volume(session, {
 +            'volume_id': args.v,
 +            'size': size * 2,
 +        })
 +        ec2.wait_for_instance()
 +        vol.attach_to_instance(ec2.instance_id)
 +
 +        # 3. tar ...
 +        tar('root', ec2.get_ip(), args.dir)
 +
 +        if args.l is not None:
 +            # 4. apply local-filter ...
 +            pass
 +
 +        # 5. scp/ssh ...
 +        upload('root', ec2.get_ip(), args.dir)
 +
 +        if args.r is not None:
 +            # 6. apply remote-filter ...
 +            pass
 +
 +        # 7. copy to volume ...
 +        backup('root', ec2.get_ip(), args.dir)
 +
 +        # 8. teardown ...
 +        ec2.cleanup()
 +    except:
 +        ec2.cleanup()
 +
 +if __name__ == '__main__':
 +    args = parse_args(sys.argv[1:])
 +    backup(args)

commit cf7ffbd7aaad3057463855642884c17820cea0bc
Author: camatang <camatang at stevens.edu>
Date:   Mon Mar 22 22:26:47 2021 -0400

    needed addtional config to pass actual arg directory in

diff --git a/pkg/argument_parsing.py b/pkg/argument_parsing.py
index 9b6fb32..a81f9b6 100644
--- a/pkg/argument_parsing.py
+++ b/pkg/argument_parsing.py
@@ -13,7 +13,7 @@ Options:
   -v volume-id  Use the given volume instead of creating a new one.
 """
 import argparse
-import sys
+
 
 def parse_args(args):
   parser = argparse.ArgumentParser(
@@ -39,4 +39,8 @@ def parse_args(args):
       type=str,
       help="Use the given volume instead of creating a new one.",
   )
+  parser.add_argument(
+      "dir",
+      help="Directory to backup",
+  )
   return parser.parse_args(args)

commit 59691d8c3e8ceb0551a8f77689c58899ed605176
Author: camatang <camatang at stevens.edu>
Date:   Mon Mar 22 22:26:19 2021 -0400

    boto3 work under way

diff --git a/src/README b/src/README
index 6b42eb0..8b9254a 100644
--- a/src/README
+++ b/src/README
@@ -1,5 +1,15 @@
 all the code goes in here
 
+### Boto3
+
+**EC2**
+https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ec2.html
+
+**EC2 > Volumes**
+https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ec2.html#EC2.Client.create_volume
+https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ec2.html#volume
+
+
 ### Virtual Environments
 
 https://docs.python.org/3/library/venv.html
diff --git a/src/ec2.py b/src/ec2.py
new file mode 100644
index 0000000..77d4b49
--- /dev/null
+++ b/src/ec2.py
@@ -0,0 +1,83 @@
+import os
+
+from botocore.exceptions import ClientError
+
+
+class EC2(object):
+    session = None
+    ec2_client = None
+    instance = None
+    instance_id = None
+
+    def __init__(self, session, config=None):
+        if config is None:
+            config = {}
+        self.session = session
+        self.ec2_client = self.session.client('ec2')
+
+        # TODO: need to parse 'EBS_BACKUP_FLAGS_AWS' in
+        #  order to override defaults...
+        if 'EBS_BACKUP_VERBOSE' in os.environ:
+            overrides = os.environ['EBS_BACKUP_VERBOSE']
+
+        instance = self.ec2_client.run_instances(
+            ImageId='ami-0018b2d98332ba7e3',
+            MinCount=1,
+            MaxCount=1,
+            InstanceType='t2.micro',
+            Placement={"AvailabilityZone": config.get('zone_id', 'us-east-1a')}
+        )
+        # TODO: make sure we are getting the correct instance
+        #  (and not an existing instance on the account)
+        self.instance = instance["Instances"][0]
+        self.instance_id = self.instance["InstanceId"]
+
+    def get_a_zone(self):
+        try:
+            self.ec2_client.describe_availability_zones(DryRun=True)
+        except ClientError as e:
+            if 'DryRunOperation' not in str(e):
+                raise
+            try:
+                availability_zones = self.ec2_client.describe_availability_zones(DryRun=False)
+                print(availability_zones)
+                # son_zones = json.loads(availability_zones)
+                json_zones = availability_zones
+                for zone in json_zones["AvailabilityZones"]:
+                    if zone["State"] == "available" and zone["ZoneType"] == "availability-zone":
+                        return zone["ZoneName"]
+                raise Exception("Could not find an available zone")
+            except ClientError as e:
+                print(e)
+
+    def get_id(self):
+        return self.instance_id
+
+    def get_ip(self):
+        return self.instance.public_ip_address
+
+    def wait_for_instance(self):
+        waiter = self.ec2_client.get_waiter('instance_running')
+        waiter.wait(
+            InstanceIds=[self.instance_id],
+            DryRun=False
+        )
+
+    def cleanup(self):
+        try:
+            res1 = self.ec2_client.stop_instances(
+                InstanceIds=[self.instance_id],
+                DryRun=False
+            )
+            res2 = self.ec2_client.filter(
+                InstanceIds=[self.instance_id],
+            ).terminate()
+
+            if 'EBS_BACKUP_VERBOSE' in os.environ and os.environ['EBS_BACKUP_VERBOSE']:
+                print(res1)
+                print(res2)
+        except ClientError as e:
+            if 'EBS_BACKUP_VERBOSE' in os.environ and os.environ['EBS_BACKUP_VERBOSE']:
+                print(e)
+
+
diff --git a/src/main.py b/src/main.py
index 4f842c8..0e73e4c 100644
--- a/src/main.py
+++ b/src/main.py
@@ -1,10 +1,111 @@
-import boto3
-import sys
 import os
+import subprocess
+import sys
+
+import boto3
+
+from src.ec2 import EC2
+from src.volume import Volume
 
 sys.path.append('..')
 from pkg import parse_args
 
+#### TODO's ####
+# [ ] determine whether to use subprocess or python logic for various operations
+# [ ] create central logging utility around EBS_BACKUP_VERBOSE
+# [ ] integrate local and remote filter logic
+# [ ] potentially reuse the same instance or recreate each time?
+
+### Questions's ###
+# [ ] can we have both local and remote filters?
+
+
+ZONE = 'us-east-1'
+
+# Windows alternative to ~/.aws/credentials & ~/.aws/config (for local/testing purposes)
+# achieves the same result of boto3.[resource|client]('service') via
+# session.[resource|client]('service') but instead with explicit session configuration
+session = boto3.Session(
+    aws_access_key_id=os.getenv('AWS_ACCESS_KEY'),
+    aws_secret_access_key=os.getenv('AWS_SECRET_KEY'),
+    region_name=ZONE,
+)
+
+
+def upload(user, host, dir):
+    # scp directory to be backed up to ec2 instance
+    # flags = os.environ['EBS_BACKUP_VERBOSE']
+    subprocess.call('scp {flags} {dir} {user}@{host}:/tmp'.format(
+        flags='', user=user, host=host, dir=dir
+    ).split(' '))
+
+# TODO: ensure we aren't overwriting something
+def tar(user, host, dir):
+    subprocess.call('tar czf - {dir} > {dir}.tgz'.format(
+        user=user, host=host, dir=dir,
+    ).split(' '))
+
+    # # Result of this tar will get written to our the volume
+    # flags = os.environ['EBS_BACKUP_VERBOSE']
+    # if flags:
+    #     subprocess.call('ssh {flags} {user}@{host} tar czf - tmp/{dir} > {dir}.tgz'.format(
+    #         flags=flags, user=user, host=host, dir=dir,
+    #     ).split(' '))
+    # else:
+    #     subprocess.call('ssh {user}@{host} tar czf - tmp/{dir} > {dir}.tgz'.split(' '))
+
+def backup(user, host, dir):
+    subprocess.call('ssh {user}@{host} dd if=/tmp/{dir}.tgz of=/dev/sdf'.format(
+        user=user, host=host, dir=dir
+    ).split(' '))
+
+def calculate_dir_size(dir):
+    # TODO: make sure this is x-platform compatible
+    total = sum(d.stat().st_size for d in os.scandir(dir) if d.is_file())
+    print(total)
+    return total
+
+def backup(args):
+    ec2 = None
+
+    try:
+        size = calculate_dir_size(args.dir)
+
+        # 1. create ec2 ...
+        ec2 = EC2(session, {
+            'zone_id': ZONE + 'a',
+        })
+
+        # 2. volume [creation] attachment ...
+        vol = Volume(session, {
+            'volume_id': args.v,
+            'size': size * 2,
+        })
+        ec2.wait_for_instance()
+        vol.attach_to_instance(ec2.instance_id)
+
+        # 3. tar ...
+        tar('root', ec2.get_ip(), args.dir)
+
+        if args.l is not None:
+            # 4. apply local-filter ...
+            pass
+
+        # 5. scp/ssh ...
+        upload('root', ec2.get_ip(), args.dir)
+
+        if args.r is not None:
+            # 6. apply remote-filter ...
+            pass
+
+        # 7. copy to volume ...
+        backup('root', ec2.get_ip(), args.dir)
+
+        # 8. teardown ...
+        ec2.cleanup()
+    except:
+        ec2.cleanup()
+
 if __name__ == '__main__':
-  args = parse_args(sys.argv[1:])
-  print(args)
\ No newline at end of file
+    args = parse_args(sys.argv[1:])
+    backup(args)
diff --git a/src/tmp/tmp.txt b/src/tmp/tmp.txt
new file mode 100644
index 0000000..6feaed4
--- /dev/null
+++ b/src/tmp/tmp.txt
@@ -0,0 +1 @@
+Test file for backing up.
\ No newline at end of file
diff --git a/src/volume.py b/src/volume.py
new file mode 100644
index 0000000..694761b
--- /dev/null
+++ b/src/volume.py
@@ -0,0 +1,68 @@
+# WIP: "Backup Volume" class for working with ec2 volume API
+from botocore.exceptions import ClientError
+
+
+class Volume(object):
+
+    # From the session instance we can access all services via the either `client` or `resource`
+    session = None
+    ec2_client = None
+    ec2_volume = None
+    volume_id = None
+
+    def __init__(self, session, config=None):
+        if config is None:
+            config = {}
+        self.session = session
+        self.ec2_client = session.client('ec2')
+
+        # for new volumes
+        if not ('volume_id' in config) or config['volume_id'] is None:
+            res = self.ec2_client.create_volume(
+                AvailabilityZone='us-east-1a',
+                # TODO: this needs to be 2x the dir that is being backed up
+                Size=config['size'],
+            )
+            self.volume_id = res['VolumeId']
+        # for existing volumes
+        self.ec2_volume = session.resource('ec2').Volume(self.volume_id)
+        if self.ec2_volume is None:
+            raise Exception("The provided volume ID does not exist")
+
+    # Instance doesn't need to be running, according to boto3 documentation for volume.attach_volume
+    # Encrypted EBS volumes must be attached to instances that support Amazon EBS encryption
+    # After volume is attached, it must be made available
+    # TODO: Must find out how to determine suitable raw disk device, may differ depending on the instance type
+    def attach_to_instance(self, instance_id):
+        # chosen from: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/device_naming.html
+        device_name = "/dev/sdf"  # Just for example. Should determine based on instance type
+        try:
+            res = self.ec2_volume.attach_to_instance(
+                Device=device_name,
+                InstanceId=instance_id,
+                DryRun=True
+            )
+            # Note: potential metadata we might be interested in
+            res['ResponseMetadata']
+        except ClientError as e:
+            if 'DryRunOperation' not in str(e):
+                raise
+        try:
+            res = self.ec2_volume.attach_to_instance(
+                Device=device_name,
+                InstanceId=instance_id,
+                DryRun=False
+            )
+            # Note: potential metadata we might be interested in
+            res['ResponseMetadata']
+        except ClientError as e:
+            print(e)
+
+    def delete(self):
+        # TODO: ensure `volume_id` is set
+        res = self.ec2_client.delete_volume(
+            VolumeId=self.volume_id
+        )
+        # Note: potential metadata we might be interested in
+        res['ResponseMetadata']
+

commit b619988c169e5c0777707a59506fc58eccd0205c
Author: camatang <camatang at stevens.edu>
Date:   Thu Mar 18 21:12:41 2021 -0400

    added root gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..62c8935
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+.idea/
\ No newline at end of file

-----------------------------------------------------------------------

Summary of changes:
 .gitignore              |   3 +-
 src/README              |  10 +++++
 src/argument_parsing.py |   8 +++-
 src/ec2.py              |  83 +++++++++++++++++++++++++++++++++++
 src/env_parsing.py      |   1 -
 src/main.py             | 112 +++++++++++++++++++++++++++++++++++++++++++++---
 src/requirements.txt    |  18 ++++----
 src/tmp/tmp.txt         |   1 +
 src/volume.py           |  68 +++++++++++++++++++++++++++++
 9 files changed, 285 insertions(+), 19 deletions(-)
 create mode 100644 src/ec2.py
 create mode 100644 src/tmp/tmp.txt
 create mode 100644 src/volume.py


hooks/post-receive
-- 
CS615 EBS-BACKUP; backup a directory into Elastic Block Storage (EBS)


More information about the cs615asa mailing list