nmh-workers
[Top] [All Lists]

[Nmh-workers] updated: parse RFC-2822 quoted-string and quoted-pair

2003-12-19 09:30:04
In light of the discussion on this list a few days ago, I have changed
my RFC-2822-unquoting code to more strictly follow the RFC.  There are
two sorts of quoting that must be removed: quoted-pair and quoted-string.

* quoted-pair: An ASCII character x preceded by a \ is 'quoted'; the
string \x unquotes to the single character x.  In my code, character 0
(NUL) cannot be in a quoted-pair.  In the RFC, a NUL can be in a quoted
pair (see obs-qp). Because nmh is implemented using C strings, a
NUL in a quoted-pair ends the string.  So there's no point in trying
to unquote NULs.

* quoted-string: Remove unquoted " characters.  According to the RFC,
these must always be balanced and cannot nest.  So it is safe to just
remove them. Note that a double quote " preceded by a \ is not removed,
but is interpreted as a double quote character; this is just a special
case of quoted-pair.

* Otherwise, make no changes to the string.

The code follows.  

-- Carl

Index: h/fmt_compile.h
===================================================================
RCS file: /cvs-latest/nmh/nmh/h/fmt_compile.h,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 fmt_compile.h
--- h/fmt_compile.h     30 Apr 1999 18:08:34 -0000      1.1.1.1
+++ h/fmt_compile.h     19 Dec 2003 14:58:15 -0000
@@ -70,40 +70,41 @@
 #define FT_LS_FRIENDLY 58      /* set "str" to "friendly" format addr */
 #define FT_LV_HOSTTYPE 59      /* set "value" to addr host type       */
 #define FT_LV_INGRPF   60      /* set "value" to addr in-group flag   */
-#define FT_LV_NOHOSTF  61      /* set "value" to addr no-host flag    */
+#define FT_LS_UNQUOTE  61      /* remove RFC 2822 quotes from "str"   */
+#define FT_LV_NOHOSTF  62      /* set "value" to addr no-host flag */
 
 /* Date Coercion */
-#define FT_LOCALDATE   62      /* Coerce date to local timezone */
-#define FT_GMTDATE     63      /* Coerce date to gmt            */
+#define FT_LOCALDATE   63      /* Coerce date to local timezone */
+#define FT_GMTDATE     64      /* Coerce date to gmt            */
 
 /* pre-format processing */
-#define FT_PARSEDATE   64      /* parse comp into a date (tws) struct */
-#define FT_PARSEADDR   65      /* parse comp into a mailaddr struct   */
-#define FT_FORMATADDR  66      /* let external routine format addr    */
-#define FT_MYMBOX      67      /* do "mymbox" test on comp            */
+#define FT_PARSEDATE   65      /* parse comp into a date (tws) struct */
+#define FT_PARSEADDR   66      /* parse comp into a mailaddr struct   */
+#define FT_FORMATADDR  67      /* let external routine format addr    */
+#define FT_MYMBOX      68      /* do "mymbox" test on comp            */
 
 /* misc. */            /* ADDTOSEQ only works if you include "options LBL" */
-#define FT_ADDTOSEQ    68      /* add current msg to a sequence       */
+#define FT_ADDTOSEQ    69      /* add current msg to a sequence       */
 
 /* conditionals & control flow (must be last) */
-#define FT_SAVESTR     69      /* save current str reg               */
-#define FT_DONE                70      /* stop formatting                    */
-#define FT_PAUSE       71      /* pause                              */
-#define FT_NOP         72      /* nop                                */
-#define FT_GOTO                73      /* (relative) goto                    */
-#define FT_IF_S_NULL   74      /* test if "str" null                 */
-#define FT_IF_S                75      /* test if "str" non-null             */
-#define FT_IF_V_EQ     76      /* test if "value" = literal          */
-#define FT_IF_V_NE     77      /* test if "value" != literal         */
-#define FT_IF_V_GT     78      /* test if "value" > literal          */
-#define FT_IF_MATCH    79      /* test if "str" contains literal     */
-#define FT_IF_AMATCH   80      /* test if "str" starts with literal  */
-#define FT_S_NULL      81      /* V = 1 if "str" null                */
-#define FT_S_NONNULL   82      /* V = 1 if "str" non-null            */
-#define FT_V_EQ                83      /* V = 1 if "value" = literal         */
-#define FT_V_NE                84      /* V = 1 if "value" != literal        */
-#define FT_V_GT                85      /* V = 1 if "value" > literal         */
-#define FT_V_MATCH     86      /* V = 1 if "str" contains literal    */
-#define FT_V_AMATCH    87      /* V = 1 if "str" starts with literal */
+#define FT_SAVESTR     70      /* save current str reg               */
+#define FT_DONE                71      /* stop formatting                    */
+#define FT_PAUSE       72      /* pause                              */
+#define FT_NOP         73      /* nop                                */
+#define FT_GOTO                74      /* (relative) goto                    */
+#define FT_IF_S_NULL   75      /* test if "str" null                 */
+#define FT_IF_S                76      /* test if "str" non-null             */
+#define FT_IF_V_EQ     77      /* test if "value" = literal          */
+#define FT_IF_V_NE     78      /* test if "value" != literal         */
+#define FT_IF_V_GT     79      /* test if "value" > literal          */
+#define FT_IF_MATCH    80      /* test if "str" contains literal     */
+#define FT_IF_AMATCH   81      /* test if "str" starts with literal  */
+#define FT_S_NULL      82      /* V = 1 if "str" null                */
+#define FT_S_NONNULL   83      /* V = 1 if "str" non-null            */
+#define FT_V_EQ                84      /* V = 1 if "value" = literal         */
+#define FT_V_NE                85      /* V = 1 if "value" != literal        */
+#define FT_V_GT                86      /* V = 1 if "value" > literal         */
+#define FT_V_MATCH     87      /* V = 1 if "str" contains literal    */
+#define FT_V_AMATCH    88      /* V = 1 if "str" starts with literal */
 
 #define IF_FUNCS FT_S_NULL     /* start of "if" functions */
Index: man/mh-format.man
===================================================================
RCS file: /cvs-latest/nmh/nmh/man/mh-format.man,v
retrieving revision 1.7
diff -u -r1.7 mh-format.man
--- man/mh-format.man   21 Sep 2003 17:09:31 -0000      1.7
+++ man/mh-format.man   19 Dec 2003 14:58:18 -0000
@@ -282,6 +282,7 @@
 .\" decodecomp comp    string  Set \fIstr\fR to RFC-2047 decoded component text
 decode expr    string  decode \fIstr\fR as RFC-2047 (MIME-encoded) 
                        component
+unquote        expr    string  remove RFC-2822 quotes from \fIstr\fR
 trim   expr            trim trailing white-space from \fIstr\fR
 putstr expr            print \fIstr\fR
 putstrf        expr            print \fIstr\fR in a fixed width
Index: sbr/fmt_compile.c
===================================================================
RCS file: /cvs-latest/nmh/nmh/sbr/fmt_compile.c,v
retrieving revision 1.6
diff -u -r1.6 fmt_compile.c
--- sbr/fmt_compile.c   11 Aug 2003 01:20:52 -0000      1.6
+++ sbr/fmt_compile.c   19 Dec 2003 14:58:18 -0000
@@ -149,6 +149,8 @@
      { "mymbox",     TF_COMP,  FT_LV_COMPFLAG, FT_MYMBOX,      TFL_PUTN },
      { "addtoseq",   TF_STR,   FT_ADDTOSEQ,    0,              0 },
 
+     { "unquote",   TF_EXPR,   FT_LS_UNQUOTE,  0,              TFL_PUTS},
+
      { NULL,         0,                0,              0,              0 }
 };
 
Index: sbr/fmt_scan.c
===================================================================
RCS file: /cvs-latest/nmh/nmh/sbr/fmt_scan.c,v
retrieving revision 1.13
diff -u -r1.13 fmt_scan.c
--- sbr/fmt_scan.c      30 Sep 2003 19:55:12 -0000      1.13
+++ sbr/fmt_scan.c      19 Dec 2003 14:58:20 -0000
@@ -682,6 +682,38 @@
                  }
                }
            }
+           break;  
+
+
+               /* UNQUOTEs RFC-2822 quoted-string and quoted-pair */
+       case FT_LS_UNQUOTE:
+           if (str) {          
+               int m;
+               strncpy(buffer, str, sizeof(buffer));
+               str = buffer;
+       
+               /* we will parse from buffer to buffer2 */
+               n = 0; /* n is the input position in str */
+               m = 0; /* m is the ouput position in buffer2 */
+
+               while ( str[n] != '\0') {
+                   switch ( str[n] ) {
+                       case '\\':
+                           n++;
+                           if ( str[n] != '\0') 
+                               buffer2[m++] = str[n++];
+                           break;
+                       case '"':
+                           n++;
+                           break;
+                       default:
+                           buffer2[m++] = str[n++];
+                           break;
+                       }                
+               }
+               buffer2[m] = '\0';
+               str = buffer2;
+            }
            break;
 
        case FT_LOCALDATE:



_______________________________________________
Nmh-workers mailing list
Nmh-workers(_at_)nongnu(_dot_)org
http://mail.nongnu.org/mailman/listinfo/nmh-workers

<Prev in Thread] Current Thread [Next in Thread>