#!/bin/sh # ~/bin/.sh/grepspam # Call as: grepspam /path/name/of/mail/falsely/identified/with/a/spam/phrase # Then it # identifies which phrase to remove from lsit of spam phrases $phrases. # Analyses a mail file (path name in $1), # (that has usually been filed by procmail in ~/mail/spam/phrases/ ) # to find which phrase[s] in $phrases caused procmail to put it there. # Usual use for autopsy, after an innocent mail gets mis-filed as spam. # Call as grepspam /dev/null # Then it ifentifies grep syntax errorsin phrases. innocent=$1 phrases="$HOME/.DOTS/.procmailrc.spam_shrunk" # built from .procmailrc.phrases.src by ~/public_html/dots/Makefile # phrases="$phrases $HOME/.DOTS/.procmailrc.domains2block.src" # .procmailrc.domains2block.src is no longer appended, # as also no longer processed into # .procmailrc.spam.inc by ~jhs/public_html/dots/Makefile if [ "$innocent.x" = ".x" ]; then echo "Error. Specify a mail file to scan EG ~/mail/spam/phrases/1" exit 1 fi # Ensure ${HOME}/.procmailrc.spam_shrunk is up to date. (cd $HOME/public_html/dots ; make grepspam_hook ) > /dev/null for i in $phrases ; do if [ \! -r $i ]; then echo "Error. Cannot read list of spam phrases: $i" exit 1 fi done # Aggregate all spam phrases, maybe later I will file sepeartely by language, # or import external lists etc. cat $phrases > $HOME/tmp/grepspam.$$.tmp IFS=' ' export IFS # Show each line from spam phrases list that matches in the mail. # Do not enter spam phrases starting with a minus, ie '-', # as grep produces an error report. # If a syntax error is reported, # such as # egrep: Unmatched ( or \( # To find the error, Uncomment the CheckPhrase: line, Then: # script # sh -x `which grepspam` /dev/null # ^D # vi typescript # Problem: A spam phrase in list of # highest quality of rep\\|ica # with ('for i in') immediately below, shows # false matches on string "ica" in mail with words such as: # certificate, helvetica; # although the .procmailrc.spam.inc does the right thing with # * highest quality of rep\|ica # because ~/public_html/dots/Makefile reducing \\ to \ # converting from ~/.procmailrc.spam_shrunk to ~/.procmailrc.spam.inc # for i in `cat $HOME/tmp/grepspam.$$.tmp` ; do # # echo "CheckPhrase: $i" # egrep -q -i "$i" $innocent && echo "MatchPhrase: $i" # # No "&& exit" as we want all matching phrases, not just the first. # done # New 'while' below converts \\ to \ so it doesnt cause a false match. cat $HOME/tmp/grepspam.$$.tmp | while read phrase ; do # echo CheckPhrase: $phrase egrep -q -i "$phrase" $innocent && echo "MatchPhrase: $phrase" done # JJLATER I should switch the Makefile to use a syntax like 'for' # further above, rather than 'while read' immediately above, as further # above does not lose a single backslash, so I would not then need # double backslash in .procmailrc.phrases.src rm $HOME/tmp/grepspam.$$.tmp