  * Add two new dpkg options --pkg-exclude and --pkg-include for filtering
    files on package installation. This allows embedded systems to skip
    /usr/share/doc, manpages, etc. (Patch now also landed in upstream git
    head).
  * src/Makefile.in: Re-run automake to pick up changes from patch above.

--- man/dpkg.1	2010-02-14 00:29:19 +0000
+++ man/dpkg.1	2010-06-11 14:51:51 +0000
@@ -519,6 +519,41 @@
 current dpkg action. Note: front-ends might call dpkg several times per
 invocation, which might run the hooks more times than expected.
 .RE
+.P
+.BI \-\-path\-exclude= glob-pattern
+.br
+.BI \-\-path\-include= glob-pattern
+.RS
+Set \fIglob-pattern\fP as a path filter, either by excluding or re-including
+previously excluded paths matching the specified patterns during install.
+
+\fIWarning: take into account that depending on the excluded paths you
+might completely break your system, use with caution.\fP
+
+The glob patterns use the same wildcards used in the shell, were '*' matches
+any sequence of characters, including the empty string and also '/'. For
+example, \fI'/usr/*/READ*'\fP matches \fI'/usr/share/doc/package/README'\fP.
+As usual, '?' matches any single character (again, including '/'). And '['
+starts a character class, which can contain a list of characters, ranges
+and complementations. See \fBglob\fP(7) for detailed information about
+globbing. Note: the current implementation might re-include more directories
+and symlinks than needed, to be on the safe side and avoid possible unpack
+failures, future work might fix this.
+
+This can be used to remove all paths except some particular ones; a typical
+case is:
+
+.nf
+.B \-\-path\-exclude=/usr/share/doc/*
+.B \-\-path\-include=/usr/share/doc/*/copyright
+.fi
+
+to remove all documentation files except the copyright files.
+
+These two options can be specified multiple times, and interleaved with
+each other. Both are processed in the given order, with the last rule that
+matches a file name making the decision.
+.RE
 .TP
 \fB\-\-status\-fd \fR\fIn\fR
 Send machine-readable package status and progress information to file

=== modified file 'src/Makefile.am'
--- src/Makefile.am	2010-02-14 00:29:19 +0000
+++ src/Makefile.am	2010-06-11 14:51:51 +0000
@@ -25,6 +25,7 @@
 	enquiry.c \
 	errors.c \
 	filesdb.c filesdb.h \
+	filters.c filters.h \
 	divertdb.c \
 	statdb.c \
 	help.c \

=== modified file 'src/Makefile.in'
--- src/Makefile.in	2010-03-07 23:38:51 +0000
+++ src/Makefile.in	2010-06-11 14:51:51 +0000
@@ -58,11 +58,11 @@
 PROGRAMS = $(bin_PROGRAMS)
 am_dpkg_OBJECTS = archives.$(OBJEXT) cleanup.$(OBJEXT) \
 	configure.$(OBJEXT) depcon.$(OBJEXT) enquiry.$(OBJEXT) \
-	errors.$(OBJEXT) filesdb.$(OBJEXT) divertdb.$(OBJEXT) \
-	statdb.$(OBJEXT) help.$(OBJEXT) main.$(OBJEXT) \
-	packages.$(OBJEXT) pkg-show.$(OBJEXT) processarc.$(OBJEXT) \
-	remove.$(OBJEXT) select.$(OBJEXT) trigproc.$(OBJEXT) \
-	update.$(OBJEXT)
+	errors.$(OBJEXT) filesdb.$(OBJEXT) filters.$(OBJEXT) \
+	divertdb.$(OBJEXT) statdb.$(OBJEXT) help.$(OBJEXT) \
+	main.$(OBJEXT) packages.$(OBJEXT) pkg-show.$(OBJEXT) \
+	processarc.$(OBJEXT) remove.$(OBJEXT) select.$(OBJEXT) \
+	trigproc.$(OBJEXT) update.$(OBJEXT)
 dpkg_OBJECTS = $(am_dpkg_OBJECTS)
 am__DEPENDENCIES_1 =
 dpkg_DEPENDENCIES = ../lib/dpkg/libdpkg.a ../lib/compat/libcompat.a \
@@ -256,6 +256,7 @@
 	enquiry.c \
 	errors.c \
 	filesdb.c filesdb.h \
+	filters.c filters.h \
 	divertdb.c \
 	statdb.c \
 	help.c \
@@ -403,6 +404,7 @@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/enquiry.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/errors.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/filesdb.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/filters.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/help.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/main.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/packages.Po@am__quote@

=== modified file 'src/archives.c'
--- src/archives.c	2010-04-15 10:39:41 +0000
+++ src/archives.c	2010-06-28 08:34:38 +0000
@@ -57,6 +57,7 @@
 #include "filesdb.h"
 #include "main.h"
 #include "archives.h"
+#include "filters.h"
 
 #define MAXCONFLICTORS 20
 
@@ -569,13 +570,23 @@
       }
     }
   }
-       
-  if (existingdirectory) return 0;
+
   if (keepexisting) {
     tarfile_skip_one_forward(ti, oldnifd, nifd);
     return 0;
   }
 
+  if (filter_should_skip(ti)) {
+    nifd->namenode->flags &= ~fnnf_new_inarchive;
+    nifd->namenode->flags |= fnnf_filtered;
+    tarfile_skip_one_forward(ti, oldnifd, nifd);
+
+    return 0;
+  }
+
+  if (existingdirectory)
+    return 0;
+
   /* Now, at this stage we want to make sure neither of .dpkg-new and .dpkg-tmp
    * are hanging around.
    */

=== modified file 'src/filesdb.h'
--- src/filesdb.h	2010-04-14 09:57:59 +0000
+++ src/filesdb.h	2010-06-28 08:32:47 +0000
@@ -71,6 +71,7 @@
     fnnf_placed_on_disk=      000040, /* new file has been placed on the disk */
     fnnf_deferred_fsync =     000200,
     fnnf_deferred_rename =    000400,
+    fnnf_filtered =           001000, /* path being filtered */
   } flags; /* Set to zero when a new node is created. */
   const char *oldhash; /* valid iff this namenode is in the newconffiles list */
   struct stat *filestat;

=== added file 'src/filters.c'
--- src/filters.c	1970-01-01 00:00:00 +0000
+++ src/filters.c	2010-06-11 14:51:51 +0000
@@ -0,0 +1,128 @@
+/*
+ * dpkg - main program for package management
+ * filters.c - filtering routines for excluding bits of packages
+ *
+ * Copyright © 2007, 2008 Tollef Fog Heen <tfheen@err.no>
+ * Copyright © 2008, 2010 Guillem Jover <guillem@debian.org>
+ * Copyright © 2010 Canonical Ltd.
+ *   written by Martin Pitt <martin.pitt@canonical.com>
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <compat.h>
+
+#include <fnmatch.h>
+
+#include <dpkg/i18n.h>
+#include <dpkg/dpkg.h>
+#include <dpkg/dpkg-db.h>
+
+#include "main.h"
+#include "filesdb.h"
+#include "filters.h"
+
+struct filter_node {
+	struct filter_node *next;
+	char *pattern;
+	bool include;
+};
+
+static struct filter_node *filter_head = NULL;
+static struct filter_node **filter_tail = &filter_head;
+
+void
+filter_add(const char *pattern, bool include)
+{
+	struct filter_node *filter;
+
+	debug(dbg_general, "adding %s filter for '%s'\n",
+	      include ? "include" : "exclude", pattern);
+
+	filter = m_malloc(sizeof(*filter));
+	filter->pattern = m_strdup(pattern);
+	filter->include = include;
+	filter->next = NULL;
+
+	*filter_tail = filter;
+	filter_tail = &filter->next;
+}
+
+bool
+filter_should_skip(struct TarInfo *ti)
+{
+	struct filter_node *f;
+	bool remove = false;
+
+	if (!filter_head)
+		return false;
+
+	/* Last match wins. */
+	for (f = filter_head; f != NULL; f = f->next) {
+		debug(dbg_eachfile, "filter comparing '%s' and '%s'",
+		      &ti->Name[1], f->pattern);
+
+		if (fnmatch(f->pattern, &ti->Name[1], 0) == 0) {
+			if (f->include) {
+				remove = false;
+				debug(dbg_eachfile, "filter including %s",
+				      ti->Name);
+			} else {
+				remove = true;
+				debug(dbg_eachfile, "filter removing %s",
+				      ti->Name);
+			}
+		}
+	}
+
+	/* We need to keep directories (or symlinks to directories) if a
+	 * glob excludes them, but a more specific include glob brings back
+	 * files; XXX the current implementation will probably include more
+	 * directories than necessary, but better err on the side of caution
+	 * than failing with “no such file or directory” (which would leave
+	 * the package in a very bad state). */
+	if (remove && (ti->Type == Directory || ti->Type == SymbolicLink)) {
+		debug(dbg_eachfile,
+		      "filter seeing if '%s' needs to be reincluded",
+		      &ti->Name[1]);
+
+		for (f = filter_head; f != NULL; f = f->next) {
+			const char *wildcard;
+			int path_len;
+
+			if (!f->include)
+				continue;
+
+			/* Calculate the offset of the first wildcard
+			 * character in the pattern. */
+			wildcard = strpbrk(f->pattern, "*?[\\");
+			if (wildcard)
+				path_len = wildcard - f->pattern;
+			else
+				path_len = strlen(f->pattern);
+
+			debug(dbg_eachfiledetail,
+			      "filter subpattern '%*.s'", path_len, f->pattern);
+
+			if (strncmp(&ti->Name[1], f->pattern, path_len) == 0) {
+				debug(dbg_eachfile, "filter reincluding %s",
+				      ti->Name);
+				return false;
+			}
+		}
+	}
+
+	return remove;
+}

=== added file 'src/filters.h'
--- src/filters.h	1970-01-01 00:00:00 +0000
+++ src/filters.h	2010-06-11 14:51:51 +0000
@@ -0,0 +1,37 @@
+/*
+ * dpkg - main program for package management
+ * filters.h - external definitions for filter handling
+ *
+ * Copyright © 2007, 2008 Tollef Fog Heen <tfheen@err.no>
+ * Copyright © 2008, 2010 Guillem Jover <guillem@debian.org>
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef DPKG_FILTERS_H
+#define DPKG_FILTERS_H
+
+#include <stdbool.h>
+
+#include <dpkg/macros.h>
+#include <dpkg/tarfn.h>
+
+DPKG_BEGIN_DECLS
+
+void filter_add(const char *glob, bool include);
+bool filter_should_skip(struct TarInfo *ti);
+
+DPKG_END_DECLS
+
+#endif

=== modified file 'src/main.c'
--- src/main.c	2010-02-14 00:29:19 +0000
+++ src/main.c	2010-06-11 14:51:51 +0000
@@ -3,7 +3,9 @@
  * main.c - main program
  *
  * Copyright © 1994,1995 Ian Jackson <ian@chiark.greenend.org.uk>
- * Copyright © 2006-2009 Guillem Jover <guillem@debian.org>
+ * Copyright © 2006-2010 Guillem Jover <guillem@debian.org>
+ * Copyright © 2010 Canonical Ltd.
+ *   written by Martin Pitt <martin.pitt@canonical.com>
  *
  * This is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -49,6 +51,7 @@
 
 #include "main.h"
 #include "filesdb.h"
+#include "filters.h"
 
 static void
 printversion(const struct cmdinfo *ci, const char *value)
@@ -124,6 +127,8 @@
 "  --admindir=<directory>     Use <directory> instead of %s.\n"
 "  --root=<directory>         Install on a different root directory.\n"
 "  --instdir=<directory>      Change installation dir without changing admin dir.\n"
+"  --path-exclude=<pattern>   Do not install paths which match a shell pattern.\n"
+"  --path-include=<pattern>   Re-include a pattern after a previous exclusion.\n"
 "  -O|--selected-only         Skip packages not selected for install/upgrade.\n"
 "  -E|--skip-same-version     Skip packages whose same version is installed.\n"
 "  -G|--refuse-downgrade      Skip packages with earlier version than installed.\n"
@@ -258,6 +263,12 @@
   if (value == endp || *endp) badusage(_("--debug requires an octal argument"));
 }
 
+static void
+setfilter(const struct cmdinfo *cip, const char *value)
+{
+  filter_add(value, cip->arg);
+}
+
 static void setroot(const struct cmdinfo *cip, const char *value) {
   char *p;
   instdir= value;
@@ -494,6 +505,8 @@
   
   { "pre-invoke",        0,   1, NULL,          NULL,      set_invoke_hook, 0, &pre_invoke_hooks_tail },
   { "post-invoke",       0,   1, NULL,          NULL,      set_invoke_hook, 0, &post_invoke_hooks_tail },
+  { "path-exclude",      0,   1, NULL,          NULL,      setfilter,     0 },
+  { "path-include",      0,   1, NULL,          NULL,      setfilter,     1 },
   { "status-fd",         0,   1, NULL,          NULL,      setpipe, 0, &status_pipes },
   { "log",               0,   1, NULL,          &log_file, NULL,    0 },
   { "pending",           'a', 0, &f_pending,    NULL,      NULL,    1 },

=== modified file 'src/processarc.c'
--- src/processarc.c	2010-04-14 11:26:18 +0000
+++ src/processarc.c	2010-06-28 08:32:48 +0000
@@ -722,6 +722,10 @@
 	    "upgrade/downgrade", fnamevb.buf);
 
       for (cfile= newfileslist; cfile; cfile= cfile->next) {
+	/* If the file has been filtered then treat it as if it didn't exist
+	 * on the file system. */
+	if (cfile->namenode->flags & fnnf_filtered)
+	  continue;
 	if (!cfile->namenode->filestat) {
 	  struct stat tmp_stat;
 

