#!/bin/bash
### Copyright 1999-2024. WebPros International GmbH. All rights reserved.

export LC_ALL="C"
unset GREP_OPTIONS

PRODUCT_ROOT_D="/usr/local/psa"

learn_bin="/usr/bin/sa-learn"

maildir="/var/qmail/mailnames"
maildir_subdir="Maildir"

train_subdir=".spamtrain"
train_confdir="$maildir/$train_subdir"

stamp_file="$train_confdir/last_update"

spam_dirs=''
spam_subdirs="*.Spam*"
spamassassin_subdir=".spamassassin"

ham_dirs=''
skip_subdirs=".Drafts .Sent .Trash .spamassassin @attachments courierimap* .sent-mail"

# do not train spamfilter if messages size greater than mas_size_max
msg_size_max="256000"

# remove messages from .Spam folder after period(in days)
delete_interval="30"

owner="popuser"
group="popuser"
system="$(uname -s)"

if [ "$system" = "Linux" ]; then
	su_opts="-s /bin/sh"
fi

accounts=''
domains=''
enabled_mail_addresses="UNDEFINED"

cur_time="$(date +'%s')"
expired_time="$(expr $cur_time - $delete_interval \* 3600 \* 24)"

#------------------------------------------------------------------------------

get_domains_list()
{
	if [ -z "$domains" ]; then
		domains="`find $maildir/ -maxdepth 1 -mindepth 1 -type d ! -name $train_subdir -exec basename {} \;`"	
	fi
}

get_accounts_list()
{
	local domain="$1"

	if [ -z "$domain" ]; then
		print_err "Domain not defined for searching mail accounts"
	fi

	accounts="`find $maildir/$domain -maxdepth 1 -mindepth 1 -type d -exec basename {} \;`"
}

get_enabled_mail_addresses_list()
{
	[ "$enabled_mail_addresses" = "UNDEFINED" ] || return 0

	enabled_mail_addresses="`set -o pipefail; $PRODUCT_ROOT_D/admin/sbin/mail_handlers_control --list --json |
		"/usr/bin/python3" -c 'import sys, json; \
			handlers = json.load(sys.stdin)["handlers"]; \
			print( "\n".join( \
				h["address"] for h in handlers if h["name"] == "spam" and h["enabled"] \
			) );'`" ||
		print_err "Cannot get list of mailboxes with enabled spam filter"
}

get_spamdirs_list()
{
	local spam_find
	local domain="$1"
	local account="$2"

	if [ -z "$domain" -o -z "$account" ]; then
		print_err "Domain or account not defined for searching spam dirs"
	fi

	# generate find command for spam directories
	for dir in $spam_subdirs; do
		if [ -z "$spam_find" ]; then
			spam_find="find $maildir/$domain/$account/$maildir_subdir \
					-maxdepth 1 -mindepth 1 -type d -name $dir"
			continue
		fi
		spam_find="$spam_find -or -name $dir"
	done

	spam_dirs="`$spam_find ! -name 'tmp' ! -name 'new' ! -name 'cur' -exec echo {}\% \; | xargs echo -n`"
}

get_hamdirs_list()
{
	local ham_find
	local domain="$1"
	local account="$2"

	if [ -z "$domain" -o -z "$account" ]; then
		print_err "Domain or account not defined for searching ham dirs"
	fi

	for dir in $spam_subdirs $skip_subdirs; do
		if [ -z "$ham_find" ]; then
			ham_find="find $maildir/$domain/$account/$maildir_subdir \
					-maxdepth 1 -mindepth 1 -type d ! -name $dir"
			continue
		fi
		ham_find="$ham_find -and ! -name $dir" 
	done

	ham_dirs=".%`$ham_find ! -name 'tmp' ! -name 'new' ! -name 'cur' -exec echo {}\% \; | xargs echo -n`"
}

spam_learn()
{
	local type="$1"
	local path="$2"
	local folders="$3"
	local subdirs="cur"
	local stamp
	local learn_messages="$train_confdir/learn_msgs_$cur_time"
	local spam_messages="$train_confdir/spam_msgs_$cur_time"

	[ "$type" = "ham" -o "$type" = "spam" ] || print_err "Wrong type of content spam/ham: $type"

	if [ "$type" = "spam" ]; then
		subdirs="$subdirs%new%tmp"
	fi
	
	if [ -f "$stamp_file" ]; then
		stamp="`cat $stamp_file`"
	fi
	
	IFS_OLD="$IFS"
	IFS="%
"
	touch $learn_messages
	touch $spam_messages
	for folder in $folders; do
		folder="`basename $folder`"
		for dir in $subdirs; do
			for msg in `find $path/$maildir_subdir/$folder/$dir -type f`; do
				local msg_file="${msg##*/}"
				local msg_date="${msg_file%%.*}"

				# Spam messages to remove from mailbox after training
				if [ "$type" = "spam" -a "$msg_date" -lt "$expired_time" ]; then
					echo "$msg" >> $spam_messages
				fi

				# skip messages greater than size limit..
				local msg_size="`/usr/bin/stat -c '%s' \"$msg\"`"
				if [ "$msg_size" -gt "$msg_size_max" ]; then
					continue
				fi

				if [ -n "$stamp" ]; then
					if [ "$msg_date" -gt "$stamp" ]; then
						echo "$msg" >> $learn_messages
					fi
					continue
				fi 

				echo "$msg" >> $learn_messages
			done
		done
	done
	IFS="$IFS_OLD"

	su - $owner $su_opts -c "$learn_bin --$type --no-sync -L --dbpath $path/$spamassassin_subdir -f $learn_messages" < /dev/null

	cat $spam_messages | xargs rm -f

	su - $owner $su_opts -c "$learn_bin --sync --dbpath $path/$spamassassin_subdir" < /dev/null

	rm $learn_messages
	rm $spam_messages
}

spam_train()
{
	local cur_domain
	local cur_account

	local found
	local domain_file="$train_confdir/domain"
	local account_file="$train_confdir/account"

	if [ -f "$domain_file" -a -f "$account_file" ]; then
		cur_domain="`cat $domain_file`"
		cur_account="`cat $account_file`"
	fi

	get_enabled_mail_addresses_list
	get_domains_list

	for domain in $domains; do
		# begin from previous point if the training was stopped or killed before..
		if [ -n "$cur_domain" -a -n "$cur_account" ]; then
			if [ "$domain" != "$cur_domain" -a "$account" != "$cur_account" ]; then
				continue
			fi
		fi

		found=1

		get_accounts_list "$domain"

		echo "$domain" > $domain_file

		for account in $accounts; do
			echo "$enabled_mail_addresses" | grep -qxF "${account}@${domain}" 2>/dev/null || continue

			get_spamdirs_list "$domain" "$account"
			get_hamdirs_list "$domain" "$account"

			if [ ! -d "$maildir/$domain/$account/$spamassassin_subdir" ]; then
				mkdir $maildir/$domain/$account/$spamassassin_subdir || \
					print_err "Unable to create spamassassin subdir for ${account}@${domain}"

				chmod 700 $maildir/$domain/$account/$spamassassin_subdir
				chown $owner:$group $maildir/$domain/$account/$spamassassin_subdir
			fi

			if [ -n "$spam_dirs" ]; then
				spam_learn "spam" "$maildir/$domain/$account" "$spam_dirs"
				spam_learn "ham" "$maildir/$domain/$account" "$ham_dirs"
			fi

			echo "$account" > $account_file
		done
	done

	# remove reference on domain/account 
	# if checking was not killed/stopped by timeout etc..
	rm -f $domain_file $account_file

	# Looking for all domains/accounts if reference domain/account
	# was removed after last spamfilter learning
	if [ -z "$found" -a -n "$domains" ]; then
		spam_train
	fi

	echo "$cur_time" >$stamp_file
}

print_err()
{
	echo "ERROR: $*"
	exit 1
}

#------------------------------------------------------------------------------

# The some checks for avoid a stupid errors
if [ -z "$maildir" -o -z "$learn_bin" ]; then
	print_err "Some constants are not defined."
fi

if [ ! -d "$train_confdir" ]; then
	mkdir $train_confdir || print_err "Unable to create config dir: $train_confdir"
fi

if [ ! -x "$learn_bin" ]; then
	echo "Spamassassin not found"
	exit 0
fi

spam_train


