iopsys-feed/parental-control/files/lib/parentalcontrol/sync_bundles.sh

339 lines
9.1 KiB
Bash

#!/bin/sh
. /lib/functions.sh
. /usr/share/libubox/jshn.sh
LOCKFILE="/tmp/sync_bundles.lock"
log_level="$(uci -q get parentalcontrol.globals.loglevel)"
log_level="${log_level:-1}"
URLBUNDLE_JSON="/etc/parentalcontrol/url_bundles.json"
DEBUG=0
log_err() {
logger -t urlfilter.sync -p error "$*"
if [ "${DEBUG}" -eq "1" ]; then
echo "#ERR# $* #" >/dev/console
fi
}
log_info() {
if [ "${log_level}" -gt 3 ]; then
logger -t urlfilter.sync -p info "$*"
fi
if [ "${DEBUG}" -eq "1" ]; then
echo "#INFO# $* #" >/dev/console
fi
}
# this script handles syncing bundles
# if its a remote file, then it would be downloaded and placed in bundle_dir
bundle_path="$(uci -q get parentalcontrol.globals.bundle_path)"
if [ -z "${bundle_path}" ]; then
return 0
fi
stringstore_dir="${bundle_path}/stringstore"
bundle_dir="${bundle_path}/urlbundles"
bundle_sizes="${bundle_path}/bundle_sizes"
# Ensure required directories and files exist
initialize_environment() {
mkdir -p "$bundle_dir"
mkdir -p "$stringstore_dir"
[ ! -f "$bundle_sizes" ] && touch "$bundle_sizes"
}
# Function to sanitize URLs to avoid code injection and ensure safety
sanitize_url() {
local raw_url="$1"
echo "$raw_url" | sed 's/[^a-zA-Z0-9_.:/?-]//g'
}
update_bundle_file_from_url() {
local download_url="$1"
local bundle_file_name="$2"
local bundle_file_size="$3"
local bundle_name="$4"
local file_name="$5"
local available_memory
available_memory=$(df "$bundle_dir" | tail -n 1 | awk '{print $(NF-2)}') # Available memory in 1K blocks
local needed_blocks=$((bundle_file_size / 1024)) # Convert bundle_file_size to 1K blocks
local max_size=$((50 * 1024 * 1024)) # 50MB in bytes
if [ "$available_memory" -le "$needed_blocks" ]; then
log_info "Error: Not enough disk space for bundle: ${bundle_name}"
return 1
fi
if [ "$bundle_file_size" -gt "$max_size" ]; then
log_info "update_bundle_file_from_url: Error: File size for ${bundle_name} exceeds 10MB"
return 1
fi
# Determine file path
local file_path
if echo "$download_url" | grep -q "^file://"; then
file_path=${download_url#file://}
else
# Random delay (0-5s) before starting the download
local delay=$((RANDOM % 6))
log_info "update_bundle_file_from_url: Waiting ${delay}s before downloading..."
sleep "$delay"
# Retry logic with exponential backoff
local temp_file="${bundle_dir}/tmp_${file_name}"
local attempt=1
local success=0
while [ $attempt -le 3 ]; do
if curl -s -o "$temp_file" "$download_url"; then
log_info "Download successful for $download_url"
success=1
break
else
log_info "update_bundle_file_from_url: Download failed. Retrying $attempt ..."
local backoff=$(( (2 ** attempt) + (RANDOM % 3) )) # Exponential backoff + 0-2s jitter
sleep "$backoff"
fi
attempt=$(( attempt+1 ))
done
if [ $success -ne 1 ]; then
log_info "update_bundle_file_from_url: Failed to download bundle: ${bundle_name}"
rm -f "$temp_file"
return 1
fi
file_path="$temp_file"
fi
# Handle compressed files
local final_path="${bundle_dir}/${bundle_file_name}"
if [[ "$file_path" =~ \.xz$ ]]; then
if ! xz -dc "$file_path" > "$final_path"; then
log_info "update_bundle_file_from_url: Decompression failed."
rm -f "$final_path"
rm -f "$file_path"
return 1
fi
rm -f "$file_path"
elif [[ "$file_path" =~ \.gz$ ]]; then
if ! gzip -dc "$file_path" > "$final_path"; then
log_info "update_bundle_file_from_url: Decompression failed."
rm -f "$final_path"
rm -f "$file_path"
return 1
fi
rm -f "$file_path"
else
mv "$file_path" "$final_path"
fi
# file would have lines of the format: 0.0.0.0 www.facebook.com
# so we keep only the url part and remove duplicates
local processed_final_path="${final_path}_urls"
awk '{print $NF}' "$final_path" | sort -u > "$processed_final_path"
# delete unprocessed file
rm -rf "$final_path"
# Update the bundle size and send ubus event
echo "$bundle_file_name $bundle_file_size" >> "$bundle_sizes"
ubus send "parentalcontrol.bundle.update" "{\"bundle_file_path\":\"${processed_final_path}\",\"bundle_name\":\"${bundle_name}\"}"
return 0
}
handle_download_url() {
local raw_download_url="$1"
local bundle_name="$2"
local sanitized_url
sanitized_url=$(sanitize_url "$raw_download_url")
local file_name="${sanitized_url##*/}" # Get everything after the last '/'
local bundle_file_name="${file_name}.urlbundle"
local file_path="${sanitized_url#file://}"
if echo "$sanitized_url" | grep -qE "^https?://|^file://"; then
local previous_bundle_size
previous_bundle_size=$(grep "^${bundle_file_name} " "$bundle_sizes" | awk '{print $2}')
# If the URL is HTTP, fetch the file size
local bundle_file_size
if echo "$sanitized_url" | grep -qE "^https?://"; then
bundle_file_header="$(curl -Is --max-time 30 "$sanitized_url" 2>/var/log/urlfilter_curl_err.log)"
curl_rc=$?
case $curl_rc in
0)
# Success
;;
6|7|28|35|52|55|56)
log_info "handle_download_url: URL not reachable (curl rc=$curl_rc): ${sanitized_url}"
return 1
;;
*)
log_info "handle_download_url: unexpected curl rc=$curl_rc for ${sanitized_url}"
;;
esac
bundle_file_size="$(echo "$bundle_file_header" | grep -i 'content-length' | cut -d: -f2 | xargs)"
[ -z "$bundle_file_size" ] && bundle_file_size=0
else
# If it's a file:// URL, get the file size from the filesystem
bundle_file_size=$(ls -l "$file_path" 2>/dev/null | awk '{print $5}')
[ -z "$bundle_file_size" ] && bundle_file_size=0
fi
if [ -n "$previous_bundle_size" ] && [ "$bundle_file_size" -eq "$previous_bundle_size" ]; then
return 0
fi
if echo "$sanitized_url" | grep -q "^file://" && ! echo "$sanitized_url" | grep -Eq "\.(xz|gz)$"; then
# the file is not processed and hence not moved if it is a local uncompressed file
sed -i "/^${bundle_file_name} /d" "$bundle_sizes"
echo "$bundle_file_name $bundle_file_size" >> "$bundle_sizes"
ubus send "parentalcontrol.bundle.update" "{\"bundle_file_path\":\"${file_path}\",\"bundle_name\":\"${bundle_name}\"}"
return 0
fi
# Remove existing entries
if [ -n "$previous_bundle_size" ]; then
sed -i "/^${bundle_file_name} /d" "$bundle_sizes"
rm -f "${bundle_dir}/${bundle_file_name}"
fi
update_bundle_file_from_url "$sanitized_url" "$bundle_file_name" "$bundle_file_size" "$bundle_name" "$file_name"
return $?
else
log_info "Error: Unsupported URL format for ${bundle_file_name}"
return 1
fi
}
cleanup_bundle_files() {
local dir="$1"
[ -d "$dir" ] || return 1
# Collect all download_url entries using config_foreach
local urls=""
get_download_url() {
local enable url
json_select "${2}"
json_get_var url url
json_get_var enable enable
enable="${enable:-1}"
if [ "${enable}" -eq 0 ]; then
log_info "get_download_url: Skipping bundle ${name} not enabled"
json_select ..
return 0
fi
url="${url#file://}"
url="${url#https://}"
url="${url#http://}"
url="${url##*/}" # Get everything after the last '/'
urls="$urls $url"
json_select ..
}
json_init
json_load_file "${URLBUNDLE_JSON}"
json_for_each_item get_download_url "urlBundles"
# Loop through all files in the directory
for file in "$dir"/*; do
[ -f "$file" ] || continue # Skip non-files
# Remove the suffix after the last dot
base_name="$(basename "$file")"
name="${base_name%.*}" # Removes the last dot and suffix
# Check if the name is present in the collected urls
if ! echo "$urls" | grep -q "$name"; then
rm -f "$file"
sed -i "/^${name} /d" "$bundle_sizes"
fi
done
}
cleanup_bundle_sizes() {
downloaded_bundle_names="$(cat "$bundle_sizes" | cut -d '.' -f 1)"
for name in $downloaded_bundle_names; do
if ls ${stringstore_dir}/${name}* 2>&1 | grep -qF '.store'; then
if ls ${stringstore_dir}/${name}* 2>&1 | grep -q cmph; then
continue
fi
fi
sed -i "/$name/d" "$bundle_sizes"
done
}
# Main handler for all profile URL bundles
handle_filter_for_bundles() {
local urlfilter
urlfilter="$(uci -q get parentalcontrol.globals.urlfilter)"
# if urlfilter is not enabled, then return
if [ "${urlfilter}" -ne "1" ]; then
log_info "urlfilter feature not enabled"
return 0
fi
initialize_environment
cleanup_bundle_files "$bundle_dir"
cleanup_bundle_files "$stringstore_dir"
cleanup_bundle_sizes
config_load parentalcontrol
config_get_bool enable globals enable 0
if [ "${enable}" -eq 0 ]; then
log_info "parental-control feature not enabled"
# Parental control is disabled
return 0
fi
check_bundle_exists() {
local enable url name
json_select "${2}"
json_get_var name name
json_get_var url url
json_get_var enable enable
enable="${enable:-1}"
if [ "${enable}" -eq 0 ]; then
log_info "check_bundle_exists: Skipping bundle ${name} not enabled"
json_select ..
return 0
fi
handle_download_url "${url}" "${name}"
json_select ..
}
json_init
json_load_file "${URLBUNDLE_JSON}"
json_for_each_item check_bundle_exists "urlBundles"
}
# Open file descriptor 200 for locking
exec 200>"$LOCKFILE"
# Try to acquire an exclusive lock; exit if another instance is running
flock -n 200 || { log_info "sync_bundles.sh is already running, exiting."; exit 1; }
handle_filter_for_bundles