procmail
[Top] [All Lists]

Decoding MIME Update

2000-03-24 09:05:36
Hello Jari, Hello All,

I have been working with both pm-jamime-kill.rc and
pm-jamime-decode.rc.

First concerning pm-jamime-kill.rc: it can kill valid included HTML
files if they meet its initial conditions.  So beware.  I have been
looking into other aspects such as the number of lines in the text and
HTML sections.  For equivalent MIME section I have found:

Message         text/plain      text/html
        1               5               27
        2              26               46
        3              93              122
        4*            209             2194

(*) 4 had a whole bunch of Microsoft Word headers under the html
part.

My conclusion, is that the text/plain and text/html sections should be
compared.  Possibly, procmail will not be the adequate tool for this.
I have not had enough time to really go through the Perl MIME stuff
that looks like it should be able to handle this.  As an aside, in
order to compare the two sections, something like striphtml is good
for making the html section readable.

Concerning, pm-jamime-decode.rc, I have expanded it to decode in
addition to simple MIME encoded text, also From: and Subject: headers
that have MIME-encodings.

I also, handle the simple case where a multipart/mixed MIME header,
only contains one single MIME encoded text section.

I offer it hhere for testing by the procmail community.  Comments are
appreciated.

A nice weekend to all,

        --Ralph

Dr. Ralph P. Sobek                Disclaimer: The above ruminations are my own.
Ralph(_dot_)Sobek(_at_)irit(_dot_)fr            
sobek(_at_)irit(_dot_)fr                                      
http://beeline.to/genealogy/
Ph:(+33)[0]561558618   FAX:(+33)[0]561556258   http://www.irit.fr/~Ralph.Sobek/
===============================================================================
Estimates are that one-third to two-thirds of animal and plant species will 
disappear in forseeable future!  AWFUL!

# @(#) pm-jamime-decode.rc -- decode MIME body contents; quoted-printable, 
base64
# @(#) $Id: pm-jamime-decode.rc,v 1.2 1998/03/30 09:49:09 jaalto Exp $
#
#   File id
#
#       .Copyright (C)  1998 Jari Aalto
#       .$Maintainer:   Jari Aalto <jari(_dot_)aalto(_at_)poboxes(_dot_)com> $
#       .$Created:      1998-01 $
#       .$Keywords:     procmail recipe $
#
#       This code is free software in terms of GNU Gen. pub. Lic. v2 or later
#       You can get newest version by sending email to maintainer with
#       subject "send <FILENAME>"
#
#   Documentation
#
#       The original father of the decoding scheme used here was
#       presented by Peter Galbraith 
<galbraith(_at_)mixing(_dot_)qc(_dot_)dfo(_dot_)ca> in
#       procmail mailing list somewhere at the end of 1997.
#
#       This includerc supposes that the header has MIME header
#       Content-Type: text/plain and performs qp or base64 decoding
#       on the whole message. Note, that if you receive messages that
#       have many mime attachements, then this recipe is not suitable
#       for it.
#
#       Procmail is not designed to handle mime attachements and this
#       recipe only applies to whole _body_.
#
#   Required settings
#
#       PMSRC must point to source direcry of procmail code. This subroutine
#       will include
#
#       o   pm-javar.rc
#
#   Call arguments (variables to set before calling)
#
#       (none)
#
#   Return values
#
#       (none)
#
#   Examples
#
#       Instead of testing the existense of text/plain in the body,
#       you can force decoding by settings JA_MIME_DECODE_REGEXP to
#       ".*".
#
#           RC_MIME_DECODE = $PMSRC/pm-jamime-decode.rc
#
#           :0
#           * condition
#           { JA_MIME_DECODE_REGEXP = ".*" }
#
#           INCLUDERC = $RC_MIME_DECODE     # call subroutine.
#
#
#   Change Log 
# 00/03/24 - RpS: Adapted from Jari Alto's pm-jamime-decode, handles
# encoded Subject and From headers, as well as multipart with only ONE
# part

# .................................................... &initialising ...

dummy = "init: pm-jamime-decode.rc Reading variable definitions if needed"

SHELL=/bin/sh

# :0
# * !  WSPC ?? ( )
# { INCLUDERC = $PMSRC/pm-javar.rc }

:0
* !  MIME_VER ?? [0-9]
{
    INCLUDERC = $PMSRC/pm-jamime.rc
}

# .......................................................... &public ...
# User configurable sections

JA_MIME_DECODE_REGEXP = ${JA_MIME_DECODE_REGEXP:-\
"^Content-Type: *text/(plain|enriched)"}

# ........................................................... &do-it ...
#   Run conversion if it was quoted printable.
#   Also reflect correct MIME header

dummy = "pm-jamime-decode.rc: handle quoted printable"

:0
* ^Subject:\/.*=?iso-8859.*?=.*
{
        SUBJECT = $MATCH
        clean_Subject = `echo "$SUBJECT" | tee /tmp/subj |            \
                sed -e 's/=?iso-8859-[1-9]?Q?\([^?]*\)?=/\1/' |       \
                $MIME_BIN_QP `
        SUBJECT_DO_IT = "yes"
}

:0
* ^From:\/.*=?iso-8859.*
{
        FROM = $MATCH
        clean_From = `echo "$FROM" | tee /tmp/from |                  \
                sed -e 's/=?iso-8859-[1-9]?Q?\([^?]*\)?=/\1/' |       \
                $MIME_BIN_QP `
        FROM_DO_IT = "yes"
}

#   Prevent calling sh -c here. This speeds up procmail
#
jaMimeDecodeShellmetas = $SHELLMETAS
SHELLMETAS

:0 fhw
* SUBJECT_DO_IT ?? ^^yes^^
* FROM_DO_IT ?? ^^yes^^
| $FORMAIL                                                         \
        -i "Subject:$clean_Subject"                                \
        -i "From:$clean_From"                                      \
        -I "X-Mime-Header-Decoded: $MIME_TYPE"

:0E fhw
* SUBJECT_DO_IT ?? ^^yes^^
| $FORMAIL                                                         \
        -i "Subject: $clean_Subject"                               \
        -I "X-Mime-Header-Decoded: $MIME_TYPE"

:0E fhw
* FROM_DO_IT ?? ^^yes^^
| $FORMAIL                                                          \
        -i "From: $clean_From"                                      \
        -I "X-Mime-Header-Decoded: $MIME_TYPE"


:0 fbw i
* MIME_BOUNDARY_COUNT ?? ^^2^^
| $AWK                                                              \
  ' BEGIN {cont=0} {                                                \
        if (cont > 0) {cont=0; next}                                \
        if (match($0, BOUNDARY) > 0 ) { next }                      \
        if ( match($0, "^Content-") > 0 ) {                         \
                if (match($0, ";$") > 0) {cont=1}                   \
                next}                                               \
        print;                                                      \
    }                                                               \
  ' BOUNDARY="$MIME_BOUNDARY"                                       \

:0 afbw
| $MIME_BIN_QP

:0 afhw
| $FORMAIL                                                          \
        -I "Content-Type: text/plain"                               \
        -I "Mime-Version: $MIME_VER"                                \
        -I "X-Mime-Type-Decoded: $MIME_TYPE"

:0
*$ $JA_MIME_DECODE_REGEXP
{
    :0 fbw
    * ^Content-Transfer-Encoding: *quoted-printable
    | $MIME_BIN_QP

        :0 Afhw
        | $FORMAIL -I "Content-Transfer-Encoding: 8bit"

    :0 fbw
    * ^Content-Transfer-Encoding: *base64
    | $MIME_BIN_64

        :0 Afhw
        | $FORMAIL -I "Content-Transfer-Encoding: 8bit"
}

# $MIME_BIN_QP does not handle separate MIME sections

#:0 EB
#*$ $JA_MIME_DECODE_REGEXP
#{
#    :0 fbw
#    * MIME_B_QP ?? yes
#   * B ?? ^Content-Transfer-Encoding: *quoted-printable
#    | sed -e "'""\\|$JA_MIME_DECODE_REGEXP|,\\|$MIME_BOUNDARY|p""'"

#       :0 Afbw
#       | $MIME_BIN_QP

#        :0 Afbw
#        | $FORMAIL -I "Content-Transfer-Encoding: 8bit"

#    :0 fbw
#    * B ?? ^Content-Transfer-Encoding: *base64
#    | $MIME_BIN_64

#        :0 Afbw
#        | $FORMAIL -I "Content-Transfer-Encoding: 8bit"
#}

SHELLMETAS  = $jaMimeDecodeShellmetas
dummy       = "subroutine: pm-jamime-decode.rc end."

:0:
test

# End of file pm-jamime-decode.rc

#:0:
#/dev/null
<Prev in Thread] Current Thread [Next in Thread>
  • Decoding MIME Update, Ralph SOBEK <=