[cs615asa] [git commit] CS615 EBS-BACKUP; backup a directory into Elastic Block Storage (EBS) branch main updated. 3fe1a2f3516ea0765bdd6f968422a09d0ef2965e

Git Owner jschauma at stevens.edu
Mon May 3 20:21:09 EDT 2021


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "CS615 EBS-BACKUP; backup a directory into Elastic Block Storage (EBS)".

The branch, main has been updated
       via  3fe1a2f3516ea0765bdd6f968422a09d0ef2965e (commit)
       via  2ffa80c9ef0bfb063bdc5aefb0eff4cea3e7138d (commit)
      from  34fa2f390dd59e9525fe9fd1ba38f6b13678fd2a (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 3fe1a2f3516ea0765bdd6f968422a09d0ef2965e
Author: Charles Magyar IV <gartril854 at gmail.com>
Date:   Sun May 2 19:39:30 2021 -0400

    EBS_BACKUP_VERBOSE works if set to 1.  EBS_BACKUP_FLAGS_SSH incorporated into piped backup-function.  Piped back-up function now correctly uses a remote filter.  I removed a lot of the code Chris initially wrote that didn't pipe.  (Sorry Chris, just making it clean since we weren't using it).  I added a bunch of TODO's in main.py

diff --git a/src/ec2.py b/src/ec2.py
index 011527e..b68d05d 100644
--- a/src/ec2.py
+++ b/src/ec2.py
@@ -77,7 +77,7 @@ class EC2(object):
             elif value[n] == '{':
                 group = re.search('\{(.*?)\}', value[n:]).group(1)
                 res = EC2.parse_values(group, {})['pl']
-                if isinstance(pl, list):
+                if isinstance(pl, list): 
                     pl.append(res)
                 else:
                     pl = res
@@ -171,7 +171,8 @@ class EC2(object):
                 raise
             try:
                 availability_zones = self.ec2_client.describe_availability_zones(DryRun=False)
-                print(availability_zones)
+                if 'EBS_BACKUP_VERBOSE' in os.environ and os.environ['EBS_BACKUP_VERBOSE']:
+                    print(availability_zones)
                 # son_zones = json.loads(availability_zones)
                 json_zones = availability_zones
                 for zone in json_zones["AvailabilityZones"]:
diff --git a/src/main.py b/src/main.py
index fb799ac..6509d54 100644
--- a/src/main.py
+++ b/src/main.py
@@ -14,20 +14,27 @@ from volume import Volume
 
 
 #### TODO's ####
+# [] Test on various OS
+# [] Corner cases for Environment flags
+# [] Document how to set up, how to use
+# [] If canceled early, don't try to detach an unattached volume during cleanup
 # [sub] determine whether to use subprocess or python logic for various operations
-# [ ] create central logging utility around EBS_BACKUP_VERBOSE
-# [soon] integrate local and remote filter logic
-# [plz no] potentially reuse the same instance or recreate each time?
+# [ done?] create central logging utility around EBS_BACKUP_VERBOSE
+#       # [] Confirm Messages make sense and are helpful.
+# [done] integrate local and remote filter logic
+# [no] potentially reuse the same instance or recreate each time?
 
 
 ### Questions's ###
+# [] Does EBS_BACKUP_VERBOSE just 0 or 1, or can it be True/False?
 # [yes] can we have both local and remote filters?
 
-ZONE = 'us-east-1'
-KEY_FLAGS = os.getenv('EBS_BACKUP_FLAGS_SSH') or ''
 DEVICE_NAME = 'xbd1' #NetBSD labels devices sequentially.
-MAX_WAIT_TRIES = 12
+KEY_FLAGS = os.getenv('EBS_BACKUP_FLAGS_SSH') or ''
+MAX_WAIT_TRIES = 18
 TRY_WAIT_TIME  = 10  #Seconds
+VERBOSE_MODE = os.getenv('EBS_BACKUP_VERBOSE') or ''
+ZONE = 'us-east-1'
 
 class VolumeStatus(Enum):
     DETACHED	= 'detached'
@@ -45,6 +52,12 @@ session = boto3.Session(
 
 client = boto3.client('ec2')
 
+def calculate_dir_size(dir):
+    # TODO: make sure this is x-platform compatible
+    total = sum(d.stat().st_size for d in os.scandir(dir) if d.is_file())
+    return total
+
+
 def pipe_backup(args, target_ip):
     local_filter = args.l
     remote_filter = args.r
@@ -53,25 +66,23 @@ def pipe_backup(args, target_ip):
     user = 'root'
     host = target_ip
     url = user+'@'+host
-    # key = KEY_FLAGS
     local_filter = args.l
     tar_process = Popen(['tar', 'cf', '-', target_dir], stdout=PIPE)
+    ssh_proc_array = ['ssh', '-v', '-o', 'StrictHostKeyChecking=no', KEY_FLAGS, url, remote_filter, '|', 'dd', 'of=/dev/'+DEVICE_NAME+''];
+
+    if( VERBOSE_MODE != 1 ):
+        ssh_proc_array.remove('-v')
+    if( KEY_FLAGS is None or KEY_FLAGS == '' ):
+        ssh_proc_array.remove(KEY_FLAGS)
+    if( remote_filter is None ):
+        ssh_proc_array.remove(remote_filter)
+        ssh_proc_array.remove('|')
+
     if( local_filter is not None ):
-        local_filter_process = Popen(local_filter.split(' '), stdin=tar_process.stdout, stdout=PIPE)
-        if( remote_filter is not None ):
-            ssh_process = Popen(['ssh', '-v', '-o', 'StrictHostKeyChecking=no', url, '"dd', 'of=/dev/'+DEVICE_NAME+'"'], 
-                stdin=local_filter_process.stdout, stdout=PIPE)
-            remote_filter_process = remote_filter.split(' '), stdin=ssh_process.stdout)
-        else:
-            ssh_process = Popen(['ssh', '-v', '-o', 'StrictHostKeyChecking=no', url, '"dd', 'of=/dev/'+DEVICE_NAME+'"'], 
-                stdin=local_filter_process.stdout)
+        local_filter_process = Popen(local_filter.split(' '), stdin=tar_process.stdout, stdout=PIPE) 
+        ssh_process = Popen(ssh_proc_array, stdin=local_filter_process.stdout)
     else:
-        if( remote_filter is not None ):
-            ssh_process = Popen(['ssh', '-v', '-o', 'StrictHostKeyChecking=no',  url, 'dd', 'of=/dev/'+DEVICE_NAME+''], 
-                stdin=tar_process.stdout, stdout=PIPE)
-            remote_filter_process = remote_filter.split(' '), stdin=ssh_process.stdout)
-        else:
-            ssh_process = Popen(['ssh', '-v', '-o', 'StrictHostKeyChecking=no',  url, 'dd', 'of=/dev/'+DEVICE_NAME+''], stdin=tar_process.stdout) 
+        ssh_process = Popen(ssh_proc_array, stdin=tar_process.stdout)
 
     out, err = ssh_process.communicate()
 
@@ -81,25 +92,28 @@ def wait_for_instance_ok(instance_id, tries):
     status_dictionary = client.describe_instance_status(InstanceIds=[instance_id])
     instance_status_info = status_dictionary.get('InstanceStatuses')
     status = instance_status_info[0].get('InstanceStatus').get('Status')
-    while(status != 'ok' and i < tries):
-        print(('Instance {id} Status: ' + status).format(id=instance_id))
+    while (status != 'ok' and i < tries):
+        if (VERBOSE_MODE):
+            print(('Instance {id} Status:{status}  ({i}/{tries})').format(id=instance_id, status=status, i=i, tries=tries))
         time.sleep(TRY_WAIT_TIME)
         status_dictionary = client.describe_instance_status(InstanceIds=[instance_id])
         instance_status_info = status_dictionary.get('InstanceStatuses')
         status = instance_status_info[0].get('InstanceStatus').get('Status')
         i = i + 1
-    if(i == tries):
-        print('Max tries reached.  Moving on...')
-    else:
-        print('Instace {id} Status: ok'.format(id=instance_id))
+    if (VERBOSE_MODE):
+        if (i == tries):
+            print('Max tries reached.  Moving on...')
+        else:
+            print('Instance {id} Status: ok'.format(id=instance_id))
 
 def wait_for_volume_status(volume_id, target_status, tries):
     i = 1
     status_dictionary = client.describe_volumes(VolumeIds=[volume_id])
     volume_status = status_dictionary.get('Volumes')[0].get('Attachments')[0].get('State')
-    while(volume_status != target_status and i < tries): 
-        print('Volume status: ' + volume_status)
-        print('Target status: ' + target_status)
+    while (volume_status != target_status and i < tries): 
+        if (VERBOSE_MODE):
+            print('Volume status: ' + volume_status)
+            print('Target status: ' + target_status + '({i}/{tries})'.format(i=i, tries=tries))
         time.sleep(TRY_WAIT_TIME)
         status_dictionary = client.describe_volumes(VolumeIds=[volume_id])
         if(status_dictionary is not None):
@@ -111,44 +125,11 @@ def wait_for_volume_status(volume_id, target_status, tries):
                 else:
                     break;
         i = i + 1
-    if(i == tries):
-        print('Max tries completed. Moving on...')
-    else:
-        print('Volume status: ' + volume_status)
-
-def upload(user, host, _dir):
-    # scp directory to be backed up to ec2 instance
-    # flags = os.environ['EBS_BACKUP_VERBOSE']
-    subprocess.Popen('scp -v -o StrictHostKeyChecking=no {key} {dir}.tar {user}@{host}:/tmp/'.format(
-        user=user, host=host, dir=_dir, key=KEY_FLAGS
-    ).split(' '))
-
-    # ssh = SSHClient()
-    # ssh.load_system_host_keys()
-    # ssh.connect(hostname=host, username=user, key_filename=KEY_NAME)
-    # scp = SCPClient(ssh.get_transport())
-    # scp.put('{dir}.tar'.format(dir=_dir), remote_path='~')
-    # scp.close()
-
-
-# ebs - backup does not use any temporary files, nor creates a
-# local copy of the archive it writes to the volume.
-def tar(dir):
-    subprocess.call('tar -cvf {dir}.tar {dir}'.format(dir=dir).split(' '))
-
-
-def backup_data(user, host, dir):
-    # TODO: make sure key exists
-    subprocess.call('ssh -v {key} {user}@{host} dd if=/tmp/{dir}.tar of=/dev/xbd1'.format(
-        user=user, host=host, dir=dir, key=KEY_FLAGS
-    ).split(' '))
-
-
-def calculate_dir_size(dir):
-    # TODO: make sure this is x-platform compatible
-    total = sum(d.stat().st_size for d in os.scandir(dir) if d.is_file())
-    return total
-
+    if (VERBOSE_MODE):
+        if (i == tries):
+            print('Max tries completed. Moving on...')
+        else:
+            print('Volume status: ' + volume_status)
 
 ### Always cleanup ###
 def exit(ec2):
@@ -156,7 +137,6 @@ def exit(ec2):
         ec2.cleanup()
     return handle
 
-
 def backup(args):
     ec2 = None
     try:
@@ -169,7 +149,7 @@ def backup(args):
             })
             # If the EC2 instance gets created let's be sure to cleanup
             # regardless of manner of exit
- #           atexit.register(exit(ec2))
+            atexit.register(exit(ec2))
         else:
             ec2 = EC2(session, {
                 'instance_id': args.i,
@@ -181,8 +161,8 @@ def backup(args):
             'volume_id': args.v,
             'size': size,
         })
-	# IF VERBOSE...
-        print('Instance spinning up...')
+        if (VERBOSE_MODE):
+            print('Instance spinning up...')
         ec2.wait_for_instance()
         vol.attach_to_instance(ec2.instance_id)
         wait_for_volume_status(vol.id(), VolumeStatus.ATTACHED.value, MAX_WAIT_TRIES)
@@ -190,33 +170,12 @@ def backup(args):
         # Allow for instance to be ok to ssh to
         wait_for_instance_ok(ec2.instance_id, MAX_WAIT_TRIES)
 
-        # 3. tar ...
-      #  tar(args.dir)
-
-        #if args.l is not None:
-            # 4. apply local-filter ...
-         #   pipe_everything(args, ec2.get_ip())
-          #  pass
-
         pipe_backup(args, ec2.get_dns())
-        # 5. scp/ssh ...
-        #upload('root', ec2.get_ip(), args.dir)
-
-#        if args.r is not None:
-            # 6. apply remote-filter ...
-#            pass
-
-        # 7. copy to volume ...
-        #backup_data('root', ec2.get_ip(), args.dir)
 
         vol.cleanup(ec2.get_id())
         wait_for_volume_status(vol.id(), VolumeStatus.DETACHED.value, MAX_WAIT_TRIES)
-        # 8. teardown ...
-        ec2.cleanup()
-
-        # remove local tar after upload
-        #subprocess.call('rm {dir}.tar')
 
+        ec2.cleanup()
         # If successful, ebs - backup will print the volume - id of the volume
         # to which it backed up the data as the only output.
         print(vol.id())
diff --git a/src/volume.py b/src/volume.py
index c0826b9..b7e1801 100644
--- a/src/volume.py
+++ b/src/volume.py
@@ -13,7 +13,10 @@ class Volume(object):
     # chosen from: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/device_naming.html
     # note: the name of the device on the machine doesn't necessarily match the volume
 
+    # 
     # TODO - should be global variable (used in main.py too)
+    # -> Not a global variable.  Device_name in main is the device in NetBSD after it's attached.
+    # -> This is the device name for the AWS function parameter.
     device_name = "/dev/sdf"  # Just for example. Should determine based on instance type
 
     def __init__(self, session, config=None):
@@ -46,7 +49,6 @@ class Volume(object):
     # Instance doesn't need to be running, according to boto3 documentation for volume.attach_volume
     # Encrypted EBS volumes must be attached to instances that support Amazon EBS encryption
     # After volume is attached, it must be made available
-    # TODO: Must find out how to determine suitable raw disk device, may differ depending on the instance type
     def attach_to_instance(self, instance_id):
         res = self.ec2_volume.attach_to_instance(
             Device=self.device_name,

commit 2ffa80c9ef0bfb063bdc5aefb0eff4cea3e7138d
Author: Charles Magyar IV <gartril854 at gmail.com>
Date:   Sun May 2 13:59:54 2021 -0400

    Remote filtering added.

diff --git a/src/main.py b/src/main.py
index 9426ddc..fb799ac 100644
--- a/src/main.py
+++ b/src/main.py
@@ -58,11 +58,20 @@ def pipe_backup(args, target_ip):
     tar_process = Popen(['tar', 'cf', '-', target_dir], stdout=PIPE)
     if( local_filter is not None ):
         local_filter_process = Popen(local_filter.split(' '), stdin=tar_process.stdout, stdout=PIPE)
-        ssh_process = Popen(['ssh', '-v', '-o', 'StrictHostKeyChecking=no', url, '"dd', 'of=/dev/'+DEVICE_NAME+'"'], stdin=local_filter_process.stdout)
+        if( remote_filter is not None ):
+            ssh_process = Popen(['ssh', '-v', '-o', 'StrictHostKeyChecking=no', url, '"dd', 'of=/dev/'+DEVICE_NAME+'"'], 
+                stdin=local_filter_process.stdout, stdout=PIPE)
+            remote_filter_process = remote_filter.split(' '), stdin=ssh_process.stdout)
+        else:
+            ssh_process = Popen(['ssh', '-v', '-o', 'StrictHostKeyChecking=no', url, '"dd', 'of=/dev/'+DEVICE_NAME+'"'], 
+                stdin=local_filter_process.stdout)
     else:
-        ssh_process = Popen(['ssh', '-v', '-o', 'StrictHostKeyChecking=no',  url, 'dd', 'of=/dev/'+DEVICE_NAME+''], stdin=tar_process.stdout)
-
-    #TODO - Remote filter.  
+        if( remote_filter is not None ):
+            ssh_process = Popen(['ssh', '-v', '-o', 'StrictHostKeyChecking=no',  url, 'dd', 'of=/dev/'+DEVICE_NAME+''], 
+                stdin=tar_process.stdout, stdout=PIPE)
+            remote_filter_process = remote_filter.split(' '), stdin=ssh_process.stdout)
+        else:
+            ssh_process = Popen(['ssh', '-v', '-o', 'StrictHostKeyChecking=no',  url, 'dd', 'of=/dev/'+DEVICE_NAME+''], stdin=tar_process.stdout) 
 
     out, err = ssh_process.communicate()
 

-----------------------------------------------------------------------

Summary of changes:
 src/ec2.py    |   5 ++-
 src/main.py   | 138 ++++++++++++++++++++++------------------------------------
 src/volume.py |   4 +-
 3 files changed, 59 insertions(+), 88 deletions(-)


hooks/post-receive
-- 
CS615 EBS-BACKUP; backup a directory into Elastic Block Storage (EBS)


More information about the cs615asa mailing list