-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsplitfiles.sh
executable file
·259 lines (227 loc) · 8.02 KB
/
splitfiles.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
#!/bin/bash
########################################################################
#
# splitfiles.sh - split and join files and directories
#
# written by Jason Baker ([email protected])
# on github: https://github.com/codercowboy/scripts
# more info: http://www.codercowboy.com
#
########################################################################
#
# UPDATES:
#
# 2021/6/14
# - Initial version
#
########################################################################
#
# Copyright (c) 2021, Coder Cowboy, LLC. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# The views and conclusions contained in the software and documentation are those
# of the authors and should not be interpreted as representing official policies,
# either expressed or implied.
#
########################################################################
#arg 1 is error
function print_usage() {
echo "USAGE: splitfiles.sh [operation] [arguments] [file or directory]"
echo ""
echo "OPERATIONS:"
echo " split [part size] - split specified file or directory into specified size"
echo " join - join specified file or all split files in specified directory"
echo ""
echo "NOTE ON SPLIT PART SIZE ARGUMENT:"
echo ""
echo " Part size for 'split' operation can be a raw number such as '1234581',"
echo " or 'k' / 'm' can be specified such as '500m' for 500 megabyte part size"
echo ""
echo "EXAMPLE USAGE:"
echo ""
echo " # example: split file.txt into 500 megabyte chunks"
echo " splitfile.sh split 500m file.txt"
echo ""
echo " # example: split each file in my_dir directory into 500 megabyte chunks"
echo " splitfile.sh split 500m my_dir"
echo ""
echo " # example: join file.txt"
echo " splitfile.sh join file.txt"
echo ""
echo " # example: join previously-split files in my_dir directory"
echo " splitfile.sh join my_dir"
echo ""
echo "ERROR: ${1}"
exit 1
}
#arg 1 = size, ie 2m for two megabytes
#arg 2 = file
function split_file() {
# -b is byte count, first arg is file to split, second arg is prefix of split files
if [ -e "${2}.part.aa" ]; then
echo "Not splitting file, it has already been split: ${2}"
return
fi
echo -n "Splitting file: ${2} into ${1} parts ... "
if [ -s "${2}" ]; then
# if file is not-zero sized, do the split
split -b ${1} "${2}" "${2}.part."
else
#echo "WARNING: file is zero-sized: ${2}"
# if file is zero-sized, just copy it to ".part.aa" for consistency
cp "${2}" "${2}.part.aa"
fi
#asterisk reference: https://unix.stackexchange.com/questions/378205/use-asterisk-in-variables
PARTS_LIST=( "${2}.part."* )
echo "(${#PARTS_LIST[@]} parts created)"
#echo "Parts: ${PARTS_LIST[@]}"
}
#arg 1 = original file name
#arg 2 = output file name
function join_file() {
if [ ! -e "${1}.part.aa" ]; then
echo "Could not find parts to join for file: ${1}"
return
elif [ -e "${2}" ]; then
echo "Not joining file, output file already exists: ${2}"
return
fi
PARTS_LIST=( "${1}.part."* )
#echo "Parts: ${PARTS_LIST[@]}"
echo -n "Joining file: ${1} (${#PARTS_LIST[@]} parts) ... "
cat "${PARTS_LIST[@]}" > "${2}"
echo " Finished"
}
# arg 1 = directory to search
# arg 2 = suffix to add to original filename (ie "joined" )
function join_files() {
SUFFIX=".${2}"
if [ "." = "${SUFFIX}" ]; then
SUFFIX=""
fi
echo "Joining Files From Directory: ${1}"
FILE_LIST=( "${1}/"*.part.aa )
# echo "Found files to join: ${FILE_LIST[@]}"
for FILE in "${FILE_LIST[@]}"; do
ORIGINAL_FILE=`echo "${FILE}" | sed 's/.part.aa//'`
join_file "${ORIGINAL_FILE}" "${ORIGINAL_FILE}${SUFFIX}"
done
echo "Finished Joining Files."
}
# arg 1 = file
function get_checksum() {
md5sum "${1}"| sed 's/ .*//'
}
# arg 1 is directory to run test in
function run_test() {
echo "starting test"
TMP_DIR="${1}/${RANDOM}"
echo "TMP_DIR: ${TMP_DIR}"
# create our test directory
mkdir -p "${TMP_DIR}"
SMALL_FILE="${TMP_DIR}/small file.bin"
echo "creating 1MB small file: ${SMALL_FILE}"
MEGABYTE=`expr 1024 \* 1024`
head -c ${MEGABYTE} /dev/random > "${SMALL_FILE}"
BIG_FILE="${TMP_DIR}/big file.bin"
echo "creating 100MB big file: ${BIG_FILE}"
HUNDRED_MEGABYTE=`expr 100 \* ${MEGABYTE}`
head -c ${HUNDRED_MEGABYTE} /dev/random > "${BIG_FILE}"
echo "splitting small file into 2MB chunks (it will make one file that's the same as the original file)"
# -b is byte count, first arg is file to split, second arg is prefix of split files
split_file 2m "${SMALL_FILE}"
echo "splitting large file into 2MB chunks"
split_file 2m "${BIG_FILE}"
echo "putting large file back together (into ${BIG_FILE}.joined)"
join_file "${BIG_FILE}" "${BIG_FILE}.joined"
join_files "${TMP_DIR}" "joined2"
echo "File list:"
ls -alh "${TMP_DIR}"
echo ""
ALL_FILES_LIST=( "${TMP_DIR}/"* )
echo ""
echo "ALL_FILES_LIST: ${ALL_FILES_LIST[@]}"
echo ""
echo "Checksums:"
md5sum "${ALL_FILES_LIST[@]}"
echo ""
TEST_SUCCESS="true"
BIG_FILE_CHECKSUM=`get_checksum "${BIG_FILE}"`
BIG_FILE_JOINED_CHECKSUM=`get_checksum "${BIG_FILE}.joined"`
echo "BIG_FILE_CHECKSUM: ${BIG_FILE_CHECKSUM}"
echo "BIG_FILE_JOINED_CHECKSUM: ${BIG_FILE_JOINED_CHECKSUM}"
if [ "${BIG_FILE_CHECKSUM}" != "${BIG_FILE_JOINED_CHECKSUM}" ]; then
echo "TEST FAILED: Big file checksums do not match."
TEST_SUCCESS="false"
fi
if [ "${TEST_SUCCESS}" = "true" ]; then
echo "TEST RESULT: SUCCESS"
else
echo "TEST RESULT: FAILED"
fi
echo "finished test, cleaning up"
if [ ! -z "${TMP_DIR}" ]; then
rm -rf "${TMP_DIR}"
fi
}
# run_test "."
if [ "split" = "${1}" ]; then
if [ "${#}" != "3" ]; then
print_usage "Invalid arguments specified. ('split' mode arguments: splitfiles.sh split [part size] [file or directory])"
elif [ ! -e "${3}" ]; then
print_usage "Specified file or directory doesn't exist: ${3}"
fi
# the first sed here strips a trailing 'm' or 'k' off the part size, which is a valid suffix
# the second sed her strips all consecutive digits out, which now should leave an empty string if it was digits followed by 'm', 'k', or nothing
PART_SIZE_CHECK=`echo "${2}" | sed 's/[mk]$//' | sed 's/[0-9]*//'`
if [ ! -z "${PART_SIZE_CHECK}" ]; then
print_usage "Invalid part size specified: ${2} (valid examples: '1024', '1024k', or '500m')"
fi
FILES="${3}"
if [ -d "${3}" ]; then
FILES=( "${3}/"* )
fi
if [ "${#FILES[@]}" = "0" ]; then
echo "No files to split were found."
exit 1
fi
for FILE in "${FILES[@]}"; do
# echo "file: ${FILE}"
FILE_PART_CHECK=`echo "${FILE}" | egrep -v ".part...$"`
if [ -z "${FILE_PART_CHECK}" ]; then
echo "Not splitting part: ${FILE}"
continue
fi
split_file ${2} "${FILE}"
done
elif [ "join" = "${1}" ]; then
if [ "${#}" != "2" ]; then
print_usage "Invalid arguments specified. ('join' mode arguments: splitfiles.sh join [file or directory])"
fi
if [ -d "${2}" ]; then
join_files "${2}" ""
else
join_file "${2}" "${2}"
fi
else
print_usage "Unsupported operation: ${1}"
fi
exit 0