procmail
[Top] [All Lists]

Re: Dealing with duplicate messages

1997-07-13 13:21:00
Eli the Bearded writes on 12 July 1997 at 20:16
[...]
(I want a very large cache because I get maybe 3500 messages a week
passing through this rc file and I want to cache at least a full week.)
[...]
Also has anyone written a program that works just like 'formail -D'
for arbitrary fingerprints extracted from text? I think I could get

Here's a copy of a program I call "expired", the intent is to work
similar to "formail -D" except using times rather than a file size.
It can also cache arbitrary strings.

Here's a few sample usages:

:0Wh
* ^TOsupport-admin
* ^FROM_MAILER
* ^X-Mailer: Lotus Notes via PostalUnion/SMTP for Windows NT
* ? expired -t4h .postal-union.cache "$SUBJECT"
/dev/null

# this tries to mimic the above exactly - thus the use of the
# .vacation.msg file which already contains some mail headers
# | expired ${HOME}/.vacation.cache "${SENDER}"
# | formail -rD 8192 ${HOME}/.vacation.formail
:0 Whc:${HOME}/vacation.lock
* $${VACATION}^0
* !^FROM_DAEMON
* $!${VACA_SUBJECT}
* $!^X-Loop: ${UNIQUE_NAME}
* ? test -s ${HOME}/.vacation.msg -a -r ${HOME}/.vacation.msg
| expired ${HOME}/.vacation.cache "${SENDER}"
:0 ehc         # if the name was not in the cache
{
  :0fi
  | cat ${HOME}/.vacation.msg | \
    formail -I "To: ${SENDER}" -I "Precedence: junk" -A "X-Loop: ${UNIQUE_NAME}"

  :0
  ! -oi -t
}

   Dan
------------------- message is author's opinion only ------------------
J. Daniel Smith <DanS(_at_)bristol(_dot_)com>        
http://www.bristol.com/~DanS
Bristol Technology B.V.                   +31 33 450 50 50, ...51 (FAX)
Amersfoort, The Netherlands               {info,jobs}(_at_)bristol(_dot_)com
-----
/* determine if a given string is expired using "idcache" as the
   reference file.  Return success if the string is still active,
   failure otherwise.  "active" means hasn't expired (or not commented
   out).

   An optinal time parameter determines how long the given entry
   should be active if it is not found, the default is one week.

   expired [-tN[smhdw]] idcache string

   This program is intended to be used with procmail/formail.  The
   return codes match those of "formail -D".  Usage with procmail
   would be
          SHELL=/bin/sh    # for other shells, this might need adjustment

          :0 Whc: vacation.lock
          * !^FROM_DAEMON
          * !^X-Loop: your(_at_)own(_dot_)mail(_dot_)address
          | formail -rtzx To: | expired vacation.cache

            :0 ehc         # if the name was not in the cache
            | (formail -rA"Precedence: junk" \
                 -A"X-Loop: your(_at_)own(_dot_)mail(_dot_)address" ; \
               echo "I received your mail,"; \
               echo "but I won't be back until Monday."; \
               echo "-- "; cat $HOME/.signature \
              ) | $SENDMAIL -oi -t

   Here is a KSH script that can be used for testing

#!/bin/ksh

expired=${HOME}/tmp/c/expired

function do_expired 
{
   arg=$1
   file=$2
   str=$3

   $expired $arg "${file}.expired" "$str"
   echo "$str -> $?"

   echo "Message-Id: <$str>" | formail -D 99999 "${file}.formail"
   echo "$str -> $?"
}

do_expired -t0s cache "always"
do_expired -t1s cache "one second"

do_expired -t1m cache "one minute"
do_expired -t1h cache "one hour"
do_expired -t1d cache "one day"
do_expired "" cache "default"
do_expired -t365d cache "one year"
do_expired -t101587w cache "max"
do_expired -tNEVER cache "never"

*/


#include <stdio.h>
#include <time.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>

#define INACTIVE EXIT_FAILURE
#define ACTIVE EXIT_SUCCESS

/* return a line from fp */
static int getline(FILE *fp, char *buf)
{
    int i=0;

    if (feof(fp))
      return(EOF);
    
    while (1)
    {
        int ch;

        ch = fgetc(fp);
        if (ch == EOF)
          return (i == 0 ? EOF : i);
        else if (ch == '\n')
        {
            buf[i] = '\0';
            return(i);
        }
        else
          buf[i++] = ch;
    }
} /* getline() */

static const char infinite_time_str[] = "99999999999999";

static const char *timetostr(const time_t *timer)
{
    static char buf[80];
    struct tm *now;

    if (*timer == (time_t)-1)
      return infinite_time_str;
    
    now = localtime(timer); /* ANSI doesn't specify failure */

    sprintf(buf, "%04d%02d%02d%02d%02d%02d", /* 14 characters */
            now->tm_year, now->tm_mon, now->tm_mday,
            now->tm_hour, now->tm_min, now->tm_sec);

    return (const char *)buf;
}

/* converts a value output by timetostr() */
static time_t strtotime(const char *timestr)
{
    struct tm timeptr;
    static char buf[5] = {'\0', '\0', '\0', '\0', '\0'};
    const char *cp = timestr;

    if ((timestr == NULL) || (strlen(timestr) != 14))
      return (time_t)-1;

    if (strcmp(timestr, infinite_time_str) == 0)
      return (time_t)-1;
    
    (void) strncpy(buf, cp, 4); cp +=4;
    timeptr.tm_year = atoi(buf);
    buf[2] = '\0';
    (void) strncpy(buf, cp, 2); cp +=2;
    timeptr.tm_mon = atoi(buf);
    (void) strncpy(buf, cp, 2); cp +=2;
    timeptr.tm_mday = atoi(buf);
    (void) strncpy(buf, cp, 2); cp +=2;
    timeptr.tm_hour = atoi(buf);
    (void) strncpy(buf, cp, 2); cp +=2;
    timeptr.tm_min = atoi(buf);
    (void) strncpy(buf, cp, 2); cp +=2;
    timeptr.tm_sec = atoi(buf);

    return mktime(&timeptr);
}

/* Write the value in "a_time" to file "fp" as a single large integer */
static void fprintf_time(FILE *fp, const time_t *timer)
{
    fprintf(fp, "%s", timetostr(timer));

} /* fprintf_time() */

static void write_cache_entry(FILE *fp, const time_t *timer, const char *str)
{
    fprintf_time(fp, timer);
    fprintf(fp, " %s\n", str);
} /* write_cache_entry() */

static const char *valid_interval(const char *interval)
{
    static char buf[80];
    const char *retval = (const char *)buf;
    size_t interval_len;
    char unit;
    char *cp;
    
    if (interval == NULL)
      return NULL;
    if ((interval_len = strlen(interval)) == 0)
      return NULL;

    (void) strcpy(buf, interval);
    if (strcmp(interval, "NEVER") == 0)
      return buf;
    unit = interval[interval_len-1];
    if ( ! ((unit == 's') || (unit == 'm') || (unit == 'h') || 
            (unit == 'd') || (unit == 'w')) )
      return NULL;
    buf[interval_len-1] = '\0';
    for (cp = buf; *cp != '\0'; cp++)
      if (!isdigit(*cp))
      {
          retval = NULL;
          break;
      }
    buf[interval_len-1] = unit;

    return retval;
} /* valid_interval */


/* add "interval" amount onto sometime returning the newtime.
 * "interval" is an integer followed by a single character: s, m, h,
 * d, w; representing units of seconds, mintues, hours, days, and
 * weeks respectively.
 */
static time_t make_interval(const char *interval_str, const time_t *timer)
{
    struct tm *now;
    struct tm newtime;
    char unit;
    size_t interval_len = strlen(interval_str);
    char *tmp_interval;
    long interval;
    
    if (strcmp(interval_str, "NEVER") == 0)
      return (time_t)-1;

    if ((tmp_interval = malloc(interval_len+1)) == NULL)
      return (time_t)-1;
    (void) strcpy(tmp_interval, interval_str);
    unit = interval_str[interval_len-1];
    tmp_interval[interval_len-1] = '\0';
    interval = atol(tmp_interval);
    free(tmp_interval);

    now = localtime(timer); /* ANSI doesn't specify failure */

    newtime = *now; /* memcpy(&newtime, now, sizeof(struct tm)); */

    switch (unit) 
    {
      case 's': newtime.tm_sec += interval; break;
      case 'm': newtime.tm_min += interval; break;
      case 'h': newtime.tm_hour += interval; break;
      case 'd': newtime.tm_mday += interval; break;
      case 'w': 
      default: newtime.tm_mday += (interval * 7); break;
    }

    return(mktime(&newtime));
} /* make_interval() */

static void do_usage (const char *progname)
{
    fprintf(stderr, "usage: %s [-tN[smhdw]] cache-file [string-to-cache]\n", 
            progname);
    exit(EXIT_FAILURE);
} /* do_usage() */

int main(int argc, char *argv[])
{
    const char *cache_str, *cache_name, *interval_str;
    time_t interval, now;
    FILE *cache_fp;
    fpos_t cache_fpos;
    char buf[2048];
    char linebuf[2048];
    int active=INACTIVE;

    if ((argc != 2) && (argc != 3) && (argc != 4))
      do_usage(argv[0]);

    (void) time(&now);
    if ( 
        ((argc == 3) || (argc == 4)) &&
        ((strlen(argv[1]) > 2) && 
         ((argv[1][0] == '-') && (argv[1][1] == 't'))) &&
        ((interval_str = valid_interval(&(argv[1][2]))) != NULL)
        )
      {
        cache_name = argv[2];
        if (argc == 4)
          cache_str = argv[3];
        else
          {
            scanf("%[^\n]s", &linebuf);
            cache_str = linebuf;
          }

        interval = make_interval(interval_str, &now);
    }
    else if (argc == 3)
    {
        cache_name = argv[1];
        cache_str = argv[2];
        interval = make_interval("1w", &now);
    }
    else if (argc == 2)
    {
        cache_name = argv[1];
        scanf("%[^\n]s", &linebuf);
        cache_str = linebuf;
        interval = make_interval("1w", &now);
    }
    else
      do_usage(argv[0]);

    if (!feof(stdin))
      while (getchar() != EOF) 
      {
        /* consume stdin; "procmail" gets upset otherwise */
      }

    if ((cache_fp = fopen(cache_name, "r")) == NULL)
    {
        if ((cache_fp = fopen(cache_name, "w")) == NULL)
        {
            fprintf(stderr, "%s: can not open %s for writing\n", 
                    argv[0], cache_name);
            exit(EXIT_FAILURE);
        }
        else
        {
            /* new cache file */
            write_cache_entry(cache_fp, &interval, cache_str);
            fclose(cache_fp);
            return INACTIVE;
        }
    }
    else
      fclose(cache_fp); /* wrong mode */

    if ((cache_fp = fopen(cache_name, "r+")) == NULL)
    {
        fprintf(stderr, "%s: can not open %s for update\n", 
                argv[0], cache_name);
        exit(EXIT_FAILURE);
    }


    fgetpos(cache_fp, &cache_fpos);
    while (getline(cache_fp, buf) != EOF)
    {
        const char *date;
        const char *str;
        time_t expire;
        int expired=0;

        if ((date = strtok(buf, " ")) == NULL)
        {
            fgetpos(cache_fp, &cache_fpos);
            continue;
        }
        if ((str = strtok(NULL, "")) == NULL)
        {
            fgetpos(cache_fp, &cache_fpos);
            continue;
        }
        if (strcmp(cache_str, str) != 0)
        {
            fgetpos(cache_fp, &cache_fpos);
            continue;
        }
        
        active=ACTIVE;
        expire = strtotime(date);
        /*
          fprintf(stderr, "n:%d e:%d d:%5.3f\t%d\n",
          now, expire, difftime(expire, now),
          (expire != (time_t)-1) && (difftime(expire, now) <= 0.0)
          );
          */
        
        if ((expire != (time_t)-1) && (difftime(expire, now) <= 0.0))
        {
          expired=1;
          active=INACTIVE;
        }
        
        fsetpos(cache_fp, &cache_fpos);
        write_cache_entry(cache_fp, &interval, str);
        fclose(cache_fp);
        return active;
    }

    fsetpos(cache_fp, &cache_fpos);
    write_cache_entry(cache_fp, &interval, cache_str);
    fclose(cache_fp);
    return active;
    
} /* main() */