From 0f0254fa8ddce39ce4e98113e7050e1cd88ff884 Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Tue, 21 Oct 2008 06:33:42 +0800 Subject: [PATCH 001/173] [MTD] [NAND] OMAP2: remove duplicated #include Removed duplicated #include in drivers/mtd/onenand/omap2.c. Signed-off-by: Huang Weiyi Signed-off-by: David Woodhouse --- drivers/mtd/onenand/omap2.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c index 8387e05daae2..e39b21d3e168 100644 --- a/drivers/mtd/onenand/omap2.c +++ b/drivers/mtd/onenand/omap2.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include From f04de505e3fa322728d1a851e08bf7060b117743 Mon Sep 17 00:00:00 2001 From: Steve Glendinning Date: Tue, 21 Oct 2008 13:25:51 +0100 Subject: [PATCH 002/173] [JFFS2] Fix build failure with !CONFIG_JFFS2_FS_WRITEBUFFER Build failure introduced by 5bf1723723487ddb0b9c9641b6559da96b27cc93 [JFFS2] Write buffer offset adjustment for NOR-ECC (Sibley) flash Signed-off-by: Steve Glendinning Signed-off-by: David Woodhouse --- fs/jffs2/nodemgmt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c index 0875b60b4bf7..21a052915aa9 100644 --- a/fs/jffs2/nodemgmt.c +++ b/fs/jffs2/nodemgmt.c @@ -261,9 +261,11 @@ static int jffs2_find_nextblock(struct jffs2_sb_info *c) jffs2_sum_reset_collected(c->summary); /* reset collected summary */ +#ifdef CONFIG_JFFS2_FS_WRITEBUFFER /* adjust write buffer offset, else we get a non contiguous write bug */ if (!(c->wbuf_ofs % c->sector_size) && !c->wbuf_len) c->wbuf_ofs = 0xffffffff; +#endif D1(printk(KERN_DEBUG "jffs2_find_nextblock(): new nextblock = 0x%08x\n", c->nextblock->offset)); From 6c87df37dcb9c6c33923707fa5191e0a65874d60 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 27 Oct 2008 22:38:27 +0300 Subject: [PATCH 003/173] proc: revert /proc/uptime to ->read_proc hook Turned out some VMware userspace does pread(2) on /proc/uptime, but seqfiles currently don't allow pread() resulting in -ESPIPE. Seqfiles in theory can do pread(), but this can be a long story, so revert to ->read_proc until then. http://bugzilla.kernel.org/show_bug.cgi?id=11856 Signed-off-by: Alexey Dobriyan --- fs/proc/uptime.c | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c index 0c10a0b3f146..df26aa88fa47 100644 --- a/fs/proc/uptime.c +++ b/fs/proc/uptime.c @@ -1,43 +1,45 @@ -#include #include #include #include -#include #include #include -static int uptime_proc_show(struct seq_file *m, void *v) +static int proc_calc_metrics(char *page, char **start, off_t off, + int count, int *eof, int len) +{ + if (len <= off + count) + *eof = 1; + *start = page + off; + len -= off; + if (len > count) + len = count; + if (len < 0) + len = 0; + return len; +} + +static int uptime_read_proc(char *page, char **start, off_t off, int count, + int *eof, void *data) { struct timespec uptime; struct timespec idle; + int len; cputime_t idletime = cputime_add(init_task.utime, init_task.stime); do_posix_clock_monotonic_gettime(&uptime); monotonic_to_bootbased(&uptime); cputime_to_timespec(idletime, &idle); - seq_printf(m, "%lu.%02lu %lu.%02lu\n", + len = sprintf(page, "%lu.%02lu %lu.%02lu\n", (unsigned long) uptime.tv_sec, (uptime.tv_nsec / (NSEC_PER_SEC / 100)), (unsigned long) idle.tv_sec, (idle.tv_nsec / (NSEC_PER_SEC / 100))); - return 0; + return proc_calc_metrics(page, start, off, count, eof, len); } -static int uptime_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, uptime_proc_show, NULL); -} - -static const struct file_operations uptime_proc_fops = { - .open = uptime_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init proc_uptime_init(void) { - proc_create("uptime", 0, NULL, &uptime_proc_fops); + create_proc_read_entry("uptime", 0, NULL, uptime_read_proc, NULL); return 0; } module_init(proc_uptime_init); From 65e082c9a33a6e9f24e9a713a7d38d11206d3c3d Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 24 Oct 2008 17:18:10 -0400 Subject: [PATCH 004/173] build fix: CONFIG_DRM_I915=y && CONFIG_ACPI=n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/gpu/drm/i915/i915_opregion.c:340: error: implicit declaration of function ‘register_acpi_notifier’ drivers/gpu/drm/i915/i915_opregion.c:361: error: implicit declaration of function ‘unregister_acpi_notifier’ Signed-off-by: Len Brown Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/Makefile | 3 ++- drivers/gpu/drm/i915/i915_drv.h | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 5ba78e4fd2b5..d8fb5d8ee7ea 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -3,13 +3,14 @@ # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. ccflags-y := -Iinclude/drm -i915-y := i915_drv.o i915_dma.o i915_irq.o i915_mem.o i915_opregion.o \ +i915-y := i915_drv.o i915_dma.o i915_irq.o i915_mem.o \ i915_suspend.o \ i915_gem.o \ i915_gem_debug.o \ i915_gem_proc.o \ i915_gem_tiling.o +i915-$(CONFIG_ACPI) += i915_opregion.o i915-$(CONFIG_COMPAT) += i915_ioc32.o obj-$(CONFIG_DRM_I915) += i915.o diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f20ffe17df71..901e80cf5813 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -539,11 +539,18 @@ extern int i915_restore_state(struct drm_device *dev); extern int i915_save_state(struct drm_device *dev); extern int i915_restore_state(struct drm_device *dev); +#ifdef CONFIG_ACPI /* i915_opregion.c */ extern int intel_opregion_init(struct drm_device *dev); extern void intel_opregion_free(struct drm_device *dev); extern void opregion_asle_intr(struct drm_device *dev); extern void opregion_enable_asle(struct drm_device *dev); +#else +static inline int intel_opregion_init(struct drm_device *dev) { return 0; } +static inline void intel_opregion_free(struct drm_device *dev) { return; } +static inline void opregion_asle_intr(struct drm_device *dev) { return; } +static inline void opregion_enable_asle(struct drm_device *dev) { return; } +#endif /** * Lock test for when it's just for synchronization of ring access. From 4e270e9b8a9d246290f3901f1fb6c5efdb734ddf Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 28 Oct 2008 07:48:34 +1000 Subject: [PATCH 005/173] drm/radeon: fixup further bus mastering confusion. rs400/480 are like previous chips not like rs6xx chips. Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_cp.c | 15 ++++++++------- drivers/gpu/drm/radeon/radeon_drv.h | 12 ++++++------ 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_cp.c b/drivers/gpu/drm/radeon/radeon_cp.c index 59a2132a8f57..073894824e6b 100644 --- a/drivers/gpu/drm/radeon/radeon_cp.c +++ b/drivers/gpu/drm/radeon/radeon_cp.c @@ -653,15 +653,16 @@ static void radeon_cp_init_ring_buffer(struct drm_device * dev, RADEON_WRITE(RADEON_SCRATCH_UMSK, 0x7); /* Turn on bus mastering */ - if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS400) || - ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS690) || + if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS690) || ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS740)) { - /* rs400, rs690/rs740 */ - tmp = RADEON_READ(RADEON_BUS_CNTL) & ~RS400_BUS_MASTER_DIS; + /* rs600/rs690/rs740 */ + tmp = RADEON_READ(RADEON_BUS_CNTL) & ~RS600_BUS_MASTER_DIS; RADEON_WRITE(RADEON_BUS_CNTL, tmp); - } else if (!(((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV380) || - ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R423))) { - /* r1xx, r2xx, r300, r(v)350, r420/r481, rs480 */ + } else if (((dev_priv->flags & RADEON_FAMILY_MASK) <= CHIP_RV350) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R420) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS400) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS480)) { + /* r1xx, r2xx, r300, r(v)350, r420/r481, rs400/rs480 */ tmp = RADEON_READ(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS; RADEON_WRITE(RADEON_BUS_CNTL, tmp); } /* PCIE cards appears to not need this */ diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h index 4dbb813910c3..02f5575ba395 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.h +++ b/drivers/gpu/drm/radeon/radeon_drv.h @@ -447,12 +447,12 @@ extern int r300_do_cp_cmdbuf(struct drm_device *dev, * handling, not bus mastering itself. */ #define RADEON_BUS_CNTL 0x0030 -/* r1xx, r2xx, r300, r(v)350, r420/r481, rs480 */ +/* r1xx, r2xx, r300, r(v)350, r420/r481, rs400/rs480 */ # define RADEON_BUS_MASTER_DIS (1 << 6) -/* rs400, rs690/rs740 */ -# define RS400_BUS_MASTER_DIS (1 << 14) -# define RS400_MSI_REARM (1 << 20) -/* see RS480_MSI_REARM in AIC_CNTL for rs480 */ +/* rs600/rs690/rs740 */ +# define RS600_BUS_MASTER_DIS (1 << 14) +# define RS600_MSI_REARM (1 << 20) +/* see RS400_MSI_REARM in AIC_CNTL for rs480 */ #define RADEON_BUS_CNTL1 0x0034 # define RADEON_PMI_BM_DIS (1 << 2) @@ -937,7 +937,7 @@ extern int r300_do_cp_cmdbuf(struct drm_device *dev, #define RADEON_AIC_CNTL 0x01d0 # define RADEON_PCIGART_TRANSLATE_EN (1 << 0) -# define RS480_MSI_REARM (1 << 3) +# define RS400_MSI_REARM (1 << 3) #define RADEON_AIC_STAT 0x01d4 #define RADEON_AIC_PT_BASE 0x01d8 #define RADEON_AIC_LO_ADDR 0x01dc From edf1ae403896cb7750800508b14996ba6be39a53 Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 29 Oct 2008 00:47:57 +0000 Subject: [PATCH 006/173] [CIFS] Reduce number of socket retries in large write path CIFS in some heavy stress conditions cifs could get EAGAIN repeatedly in smb_send2 which led to repeated retries and eventually failure of large writes which could lead to data corruption. There are three changes that were suggested by various network developers: 1) convert cifs from non-blocking to blocking tcp sendmsg (we left in the retry on failure) 2) change cifs to not set sendbuf and rcvbuf size for the socket (let tcp autotune the buffer sizes since that works much better in the TCP stack now) 3) if we have a partial frame sent in smb_send2, mark the tcp session as invalid (close the socket and reconnect) so we do not corrupt the remaining part of the SMB with the beginning of the next SMB. This does not appear to hurt performance measurably and has been run in various scenarios, but it definately removes a corruption that we were seeing in some high stress test cases. Acked-by: Shirish Pargaonkar Signed-off-by: Steve French --- fs/cifs/CHANGES | 6 +++++- fs/cifs/cifsglob.h | 2 ++ fs/cifs/cifsproto.h | 2 +- fs/cifs/connect.c | 50 +++++++++++++++++++++++++++++++++------------ fs/cifs/transport.c | 41 ++++++++++++++++++++++++++++--------- 5 files changed, 76 insertions(+), 25 deletions(-) diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 8f528ea24c48..8855331b2fba 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -4,7 +4,11 @@ Various fixes to make delete of open files behavior more predictable (when delete of an open file fails we mark the file as "delete-on-close" in a way that more servers accept, but only if we can first rename the file to a temporary name). Add experimental support for more safely -handling fcntl(F_SETLEASE). +handling fcntl(F_SETLEASE). Convert cifs to using blocking tcp +sends, and also let tcp autotune the socket send and receive buffers. +This reduces the number of EAGAIN errors returned by TCP/IP in +high stress workloads (and the number of retries on socket writes +when sending large SMBWriteX requests). Version 1.54 ------------ diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index c791e5b5a914..1cb1189f24e0 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -141,6 +141,8 @@ struct TCP_Server_Info { char versionMajor; char versionMinor; bool svlocal:1; /* local server or remote */ + bool noblocksnd; /* use blocking sendmsg */ + bool noautotune; /* do not autotune send buf sizes */ atomic_t socketUseCount; /* number of open cifs sessions on socket */ atomic_t inFlight; /* number of requests on the wire to server */ #ifdef CONFIG_CIFS_STATS2 diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 0cff7fe986e8..6f21ecb85ce5 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -36,7 +36,7 @@ extern void cifs_buf_release(void *); extern struct smb_hdr *cifs_small_buf_get(void); extern void cifs_small_buf_release(void *); extern int smb_send(struct socket *, struct smb_hdr *, - unsigned int /* length */ , struct sockaddr *); + unsigned int /* length */ , struct sockaddr *, bool); extern unsigned int _GetXid(void); extern void _FreeXid(unsigned int); #define GetXid() (int)_GetXid(); cFYI(1,("CIFS VFS: in %s as Xid: %d with uid: %d",__func__, xid,current->fsuid)); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 71b7661e2260..e9f9248cb3fe 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -92,6 +92,8 @@ struct smb_vol { bool seal:1; /* request transport encryption on share */ bool nodfs:1; /* Do not request DFS, even if available */ bool local_lease:1; /* check leases only on local system, not remote */ + bool noblocksnd:1; + bool noautotune:1; unsigned int rsize; unsigned int wsize; unsigned int sockopt; @@ -102,9 +104,11 @@ struct smb_vol { static int ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, char *netb_name, - char *server_netb_name); + char *server_netb_name, + bool noblocksnd, + bool nosndbuf); /* ipv6 never set sndbuf size */ static int ipv6_connect(struct sockaddr_in6 *psin_server, - struct socket **csocket); + struct socket **csocket, bool noblocksnd); /* @@ -191,12 +195,13 @@ cifs_reconnect(struct TCP_Server_Info *server) try_to_freeze(); if (server->protocolType == IPV6) { rc = ipv6_connect(&server->addr.sockAddr6, - &server->ssocket); + &server->ssocket, server->noautotune); } else { rc = ipv4_connect(&server->addr.sockAddr, &server->ssocket, server->workstation_RFC1001_name, - server->server_RFC1001_name); + server->server_RFC1001_name, + server->noblocksnd, server->noautotune); } if (rc) { cFYI(1, ("reconnect error %d", rc)); @@ -1192,6 +1197,10 @@ cifs_parse_mount_options(char *options, const char *devname, /* ignore */ } else if (strnicmp(data, "rw", 2) == 0) { vol->rw = true; + } else if (strnicmp(data, "noblocksend", 11) == 0) { + vol->noblocksnd = 1; + } else if (strnicmp(data, "noautotune", 10) == 0) { + vol->noautotune = 1; } else if ((strnicmp(data, "suid", 4) == 0) || (strnicmp(data, "nosuid", 6) == 0) || (strnicmp(data, "exec", 4) == 0) || @@ -1518,7 +1527,8 @@ static void rfc1002mangle(char *target, char *source, unsigned int length) static int ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, - char *netbios_name, char *target_name) + char *netbios_name, char *target_name, + bool noblocksnd, bool noautotune) { int rc = 0; int connected = 0; @@ -1590,11 +1600,16 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, (*csocket)->sk->sk_sndbuf, (*csocket)->sk->sk_rcvbuf, (*csocket)->sk->sk_rcvtimeo)); (*csocket)->sk->sk_rcvtimeo = 7 * HZ; + if (!noblocksnd) + (*csocket)->sk->sk_sndtimeo = 3 * HZ; + /* make the bufsizes depend on wsize/rsize and max requests */ - if ((*csocket)->sk->sk_sndbuf < (200 * 1024)) - (*csocket)->sk->sk_sndbuf = 200 * 1024; - if ((*csocket)->sk->sk_rcvbuf < (140 * 1024)) - (*csocket)->sk->sk_rcvbuf = 140 * 1024; + if (noautotune) { + if ((*csocket)->sk->sk_sndbuf < (200 * 1024)) + (*csocket)->sk->sk_sndbuf = 200 * 1024; + if ((*csocket)->sk->sk_rcvbuf < (140 * 1024)) + (*csocket)->sk->sk_rcvbuf = 140 * 1024; + } /* send RFC1001 sessinit */ if (psin_server->sin_port == htons(RFC1001_PORT)) { @@ -1631,7 +1646,7 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, /* sizeof RFC1002_SESSION_REQUEST with no scope */ smb_buf->smb_buf_length = 0x81000044; rc = smb_send(*csocket, smb_buf, 0x44, - (struct sockaddr *)psin_server); + (struct sockaddr *)psin_server, noblocksnd); kfree(ses_init_buf); msleep(1); /* RFC1001 layer in at least one server requires very short break before negprot @@ -1651,7 +1666,8 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, } static int -ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket) +ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket, + bool noblocksnd) { int rc = 0; int connected = 0; @@ -1720,6 +1736,9 @@ ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket) the default. sock_setsockopt not used because it expects user space buffer */ (*csocket)->sk->sk_rcvtimeo = 7 * HZ; + if (!noblocksnd) + (*csocket)->sk->sk_sndtimeo = 3 * HZ; + return rc; } @@ -1983,11 +2002,14 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, cFYI(1, ("attempting ipv6 connect")); /* BB should we allow ipv6 on port 139? */ /* other OS never observed in Wild doing 139 with v6 */ - rc = ipv6_connect(&sin_server6, &csocket); + rc = ipv6_connect(&sin_server6, &csocket, + volume_info.noblocksnd); } else rc = ipv4_connect(&sin_server, &csocket, volume_info.source_rfc1001_name, - volume_info.target_rfc1001_name); + volume_info.target_rfc1001_name, + volume_info.noblocksnd, + volume_info.noautotune); if (rc < 0) { cERROR(1, ("Error connecting to IPv4 socket. " "Aborting operation")); @@ -2002,6 +2024,8 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, sock_release(csocket); goto out; } else { + srvTcp->noblocksnd = volume_info.noblocksnd; + srvTcp->noautotune = volume_info.noautotune; memcpy(&srvTcp->addr.sockAddr, &sin_server, sizeof(struct sockaddr_in)); atomic_set(&srvTcp->inFlight, 0); diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index bf0e6d8e382a..ba4d66644ebf 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -161,7 +161,7 @@ void DeleteTconOplockQEntries(struct cifsTconInfo *tcon) int smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer, - unsigned int smb_buf_length, struct sockaddr *sin) + unsigned int smb_buf_length, struct sockaddr *sin, bool noblocksnd) { int rc = 0; int i = 0; @@ -178,7 +178,10 @@ smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer, smb_msg.msg_namelen = sizeof(struct sockaddr); smb_msg.msg_control = NULL; smb_msg.msg_controllen = 0; - smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; /* BB add more flags?*/ + if (noblocksnd) + smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; + else + smb_msg.msg_flags = MSG_NOSIGNAL; /* smb header is converted in header_assemble. bcc and rest of SMB word area, and byte area if necessary, is converted to littleendian in @@ -229,8 +232,8 @@ smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer, } static int -smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, - struct sockaddr *sin) +smb_send2(struct TCP_Server_Info *server, struct kvec *iov, int n_vec, + struct sockaddr *sin, bool noblocksnd) { int rc = 0; int i = 0; @@ -240,6 +243,7 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, unsigned int total_len; int first_vec = 0; unsigned int smb_buf_length = smb_buffer->smb_buf_length; + struct socket *ssocket = server->ssocket; if (ssocket == NULL) return -ENOTSOCK; /* BB eventually add reconnect code here */ @@ -248,7 +252,10 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, smb_msg.msg_namelen = sizeof(struct sockaddr); smb_msg.msg_control = NULL; smb_msg.msg_controllen = 0; - smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; /* BB add more flags?*/ + if (noblocksnd) + smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; + else + smb_msg.msg_flags = MSG_NOSIGNAL; /* smb header is converted in header_assemble. bcc and rest of SMB word area, and byte area if necessary, is converted to littleendian in @@ -312,6 +319,16 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, i = 0; /* in case we get ENOSPC on the next send */ } + if ((total_len > 0) && (total_len != smb_buf_length + 4)) { + cFYI(1, ("partial send (%d remaining), terminating session", + total_len)); + /* If we have only sent part of an SMB then the next SMB + could be taken as the remainder of this one. We need + to kill the socket so the server throws away the partial + SMB */ + server->tcpStatus = CifsNeedReconnect; + } + if (rc < 0) { cERROR(1, ("Error %d sending data on socket to server", rc)); } else @@ -518,8 +535,9 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, #ifdef CONFIG_CIFS_STATS2 atomic_inc(&ses->server->inSend); #endif - rc = smb_send2(ses->server->ssocket, iov, n_vec, - (struct sockaddr *) &(ses->server->addr.sockAddr)); + rc = smb_send2(ses->server, iov, n_vec, + (struct sockaddr *) &(ses->server->addr.sockAddr), + ses->server->noblocksnd); #ifdef CONFIG_CIFS_STATS2 atomic_dec(&ses->server->inSend); midQ->when_sent = jiffies; @@ -711,7 +729,8 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, atomic_inc(&ses->server->inSend); #endif rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, - (struct sockaddr *) &(ses->server->addr.sockAddr)); + (struct sockaddr *) &(ses->server->addr.sockAddr), + ses->server->noblocksnd); #ifdef CONFIG_CIFS_STATS2 atomic_dec(&ses->server->inSend); midQ->when_sent = jiffies; @@ -851,7 +870,8 @@ send_nt_cancel(struct cifsTconInfo *tcon, struct smb_hdr *in_buf, return rc; } rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, - (struct sockaddr *) &(ses->server->addr.sockAddr)); + (struct sockaddr *) &(ses->server->addr.sockAddr), + ses->server->noblocksnd); up(&ses->server->tcpSem); return rc; } @@ -941,7 +961,8 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, atomic_inc(&ses->server->inSend); #endif rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, - (struct sockaddr *) &(ses->server->addr.sockAddr)); + (struct sockaddr *) &(ses->server->addr.sockAddr), + ses->server->noblocksnd); #ifdef CONFIG_CIFS_STATS2 atomic_dec(&ses->server->inSend); midQ->when_sent = jiffies; From ae9b9403644f3ecc76867af042e7e1cfd5c099d0 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 30 Oct 2008 17:43:57 +0100 Subject: [PATCH 007/173] AMD IOMMU: fix detection of NP capable IOMMUs This patch changes the code to use IOMMU_CAP_NPCACHE as a shift and not as a mask. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 3b346c6f5514..38e88d40ab10 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -50,7 +50,7 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, /* returns !0 if the IOMMU is caching non-present entries in its TLB */ static int iommu_has_npcache(struct amd_iommu *iommu) { - return iommu->cap & IOMMU_CAP_NPCACHE; + return iommu->cap & (1UL << IOMMU_CAP_NPCACHE); } /**************************************************************************** From 61de800d33af585cb7e6f27b5cdd51029c6855cb Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 30 Oct 2008 20:15:22 +0000 Subject: [PATCH 008/173] [CIFS] fix error in smb_send2 smb_send2 exit logic was strange, and with the previous change could cause us to fail large smb writes when all of the smb was not sent as one chunk. Acked-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 2 +- fs/cifs/file.c | 2 +- fs/cifs/transport.c | 7 +++++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 843a85fb8b9a..d5eac48fc415 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1536,7 +1536,7 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon, __u32 bytes_sent; __u16 byte_count; - /* cFYI(1,("write at %lld %d bytes",offset,count));*/ + /* cFYI(1, ("write at %lld %d bytes", offset, count));*/ if (tcon->ses == NULL) return -ECONNABORTED; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 62d8bd8f14c0..ead1a3bb0256 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1824,7 +1824,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, pTcon = cifs_sb->tcon; pagevec_init(&lru_pvec, 0); - cFYI(DBG2, ("rpages: num pages %d", num_pages)); + cFYI(DBG2, ("rpages: num pages %d", num_pages)); for (i = 0; i < num_pages; ) { unsigned contig_pages; struct page *tmp_page; diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index ba4d66644ebf..ff8243a8fe3e 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -290,8 +290,11 @@ smb_send2(struct TCP_Server_Info *server, struct kvec *iov, int n_vec, if (rc < 0) break; - if (rc >= total_len) { - WARN_ON(rc > total_len); + if (rc == total_len) { + total_len = 0; + break; + } else if (rc > total_len) { + cERROR(1, ("sent %d requested %d", rc, total_len)); break; } if (rc == 0) { From a1f64819fe9f136c98d572794a35a7e377c951ef Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Thu, 30 Oct 2008 01:41:56 +0100 Subject: [PATCH 009/173] firewire: struct device - replace bus_id with dev_name(), dev_set_name() Acked-by: Greg Kroah-Hartman Signed-off-by: Kay Sievers Signed-off-by: Stefan Richter --- drivers/firewire/fw-device.c | 14 ++++++-------- drivers/firewire/fw-ohci.c | 2 +- drivers/firewire/fw-sbp2.c | 2 +- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/firewire/fw-device.c b/drivers/firewire/fw-device.c index 3fccdd484100..6b9be42c7b98 100644 --- a/drivers/firewire/fw-device.c +++ b/drivers/firewire/fw-device.c @@ -587,8 +587,7 @@ static void create_units(struct fw_device *device) unit->device.bus = &fw_bus_type; unit->device.type = &fw_unit_type; unit->device.parent = &device->device; - snprintf(unit->device.bus_id, sizeof(unit->device.bus_id), - "%s.%d", device->device.bus_id, i++); + dev_set_name(&unit->device, "%s.%d", dev_name(&device->device), i++); init_fw_attribute_group(&unit->device, fw_unit_attributes, @@ -711,8 +710,7 @@ static void fw_device_init(struct work_struct *work) device->device.type = &fw_device_type; device->device.parent = device->card->device; device->device.devt = MKDEV(fw_cdev_major, minor); - snprintf(device->device.bus_id, sizeof(device->device.bus_id), - "fw%d", minor); + dev_set_name(&device->device, "fw%d", minor); init_fw_attribute_group(&device->device, fw_device_attributes, @@ -741,13 +739,13 @@ static void fw_device_init(struct work_struct *work) if (device->config_rom_retries) fw_notify("created device %s: GUID %08x%08x, S%d00, " "%d config ROM retries\n", - device->device.bus_id, + dev_name(&device->device), device->config_rom[3], device->config_rom[4], 1 << device->max_speed, device->config_rom_retries); else fw_notify("created device %s: GUID %08x%08x, S%d00\n", - device->device.bus_id, + dev_name(&device->device), device->config_rom[3], device->config_rom[4], 1 << device->max_speed); device->config_rom_retries = 0; @@ -883,12 +881,12 @@ static void fw_device_refresh(struct work_struct *work) FW_DEVICE_RUNNING) == FW_DEVICE_SHUTDOWN) goto gone; - fw_notify("refreshed device %s\n", device->device.bus_id); + fw_notify("refreshed device %s\n", dev_name(&device->device)); device->config_rom_retries = 0; goto out; give_up: - fw_notify("giving up on refresh of device %s\n", device->device.bus_id); + fw_notify("giving up on refresh of device %s\n", dev_name(&device->device)); gone: atomic_set(&device->state, FW_DEVICE_SHUTDOWN); fw_device_shutdown(work); diff --git a/drivers/firewire/fw-ohci.c b/drivers/firewire/fw-ohci.c index 8e16bfbdcb3d..46610b090415 100644 --- a/drivers/firewire/fw-ohci.c +++ b/drivers/firewire/fw-ohci.c @@ -2468,7 +2468,7 @@ pci_probe(struct pci_dev *dev, const struct pci_device_id *ent) goto fail_self_id; fw_notify("Added fw-ohci device %s, OHCI version %x.%x\n", - dev->dev.bus_id, version >> 16, version & 0xff); + dev_name(&dev->dev), version >> 16, version & 0xff); return 0; fail_self_id: diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c index d334cac5e1fc..97df6dac3a82 100644 --- a/drivers/firewire/fw-sbp2.c +++ b/drivers/firewire/fw-sbp2.c @@ -1135,7 +1135,7 @@ static int sbp2_probe(struct device *dev) tgt->unit = unit; kref_init(&tgt->kref); INIT_LIST_HEAD(&tgt->lu_list); - tgt->bus_id = unit->device.bus_id; + tgt->bus_id = dev_name(&unit->device); tgt->guid = (u64)device->config_rom[3] << 32 | device->config_rom[4]; if (fw_device_enable_phys_dma(device) < 0) From 233976e539a93de1320fc7625b24076b1f9e2c9c Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Thu, 30 Oct 2008 01:49:20 +0100 Subject: [PATCH 010/173] ieee1394: struct device - replace bus_id with dev_name(), dev_set_name() Acked-by: Greg Kroah-Hartman Signed-off-by: Kay Sievers Signed-off-by: Stefan Richter --- drivers/ieee1394/hosts.c | 4 ++-- drivers/ieee1394/nodemgr.c | 14 +++++--------- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/ieee1394/hosts.c b/drivers/ieee1394/hosts.c index 8dd09d850419..237d0c9d69c6 100644 --- a/drivers/ieee1394/hosts.c +++ b/drivers/ieee1394/hosts.c @@ -155,11 +155,11 @@ struct hpsb_host *hpsb_alloc_host(struct hpsb_host_driver *drv, size_t extra, memcpy(&h->device, &nodemgr_dev_template_host, sizeof(h->device)); h->device.parent = dev; set_dev_node(&h->device, dev_to_node(dev)); - snprintf(h->device.bus_id, BUS_ID_SIZE, "fw-host%d", h->id); + dev_set_name(&h->device, "fw-host%d", h->id); h->host_dev.parent = &h->device; h->host_dev.class = &hpsb_host_class; - snprintf(h->host_dev.bus_id, BUS_ID_SIZE, "fw-host%d", h->id); + dev_set_name(&h->host_dev, "fw-host%d", h->id); if (device_register(&h->device)) goto fail; diff --git a/drivers/ieee1394/nodemgr.c b/drivers/ieee1394/nodemgr.c index 2376b729e876..9e39f73282ee 100644 --- a/drivers/ieee1394/nodemgr.c +++ b/drivers/ieee1394/nodemgr.c @@ -826,13 +826,11 @@ static struct node_entry *nodemgr_create_node(octlet_t guid, memcpy(&ne->device, &nodemgr_dev_template_ne, sizeof(ne->device)); ne->device.parent = &host->device; - snprintf(ne->device.bus_id, BUS_ID_SIZE, "%016Lx", - (unsigned long long)(ne->guid)); + dev_set_name(&ne->device, "%016Lx", (unsigned long long)(ne->guid)); ne->node_dev.parent = &ne->device; ne->node_dev.class = &nodemgr_ne_class; - snprintf(ne->node_dev.bus_id, BUS_ID_SIZE, "%016Lx", - (unsigned long long)(ne->guid)); + dev_set_name(&ne->node_dev, "%016Lx", (unsigned long long)(ne->guid)); if (device_register(&ne->device)) goto fail_devreg; @@ -932,13 +930,11 @@ static void nodemgr_register_device(struct node_entry *ne, ud->device.parent = parent; - snprintf(ud->device.bus_id, BUS_ID_SIZE, "%s-%u", - ne->device.bus_id, ud->id); + dev_set_name(&ud->device, "%s-%u", dev_name(&ne->device), ud->id); ud->unit_dev.parent = &ud->device; ud->unit_dev.class = &nodemgr_ud_class; - snprintf(ud->unit_dev.bus_id, BUS_ID_SIZE, "%s-%u", - ne->device.bus_id, ud->id); + dev_set_name(&ud->unit_dev, "%s-%u", dev_name(&ne->device), ud->id); if (device_register(&ud->device)) goto fail_devreg; @@ -953,7 +949,7 @@ static void nodemgr_register_device(struct node_entry *ne, fail_classdevreg: device_unregister(&ud->device); fail_devreg: - HPSB_ERR("Failed to create unit %s", ud->device.bus_id); + HPSB_ERR("Failed to create unit %s", dev_name(&ud->device)); } From 638570b54346f140bc09b986d93e76025d35180f Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sun, 26 Oct 2008 12:03:37 +0100 Subject: [PATCH 011/173] ieee1394: raw1394: fix possible deadlock in multithreaded clients Regression in 2.6.28-rc1: When I added the new state_mutex which prevents corruption of raw1394's internal state when accessed by multithreaded client applications, the following possible though highly unlikely deadlock slipped in: Thread A: Thread B: - acquire mmap_sem - raw1394_write() or raw1394_ioctl() - raw1394_mmap() - acquire state_mutex - acquire state_mutex - copy_to/from_user(), possible page fault: acquire mmap_sem The simplest fix is to use mutex_trylock() instead of mutex_lock() in raw1394_mmap(). This changes the behavior under contention in a way which is visible to userspace clients. However, since multithreaded access was entirely buggy before state_mutex was added and libraw1394's documentation advised application programmers to use a handle only in a single thread, this change in behaviour should not be an issue in practice at all. Since we have to use mutex_trylock() in raw1394_mmap() regardless whether /dev/raw1394 was opened with O_NONBLOCK or not, we now use mutex_trylock() unconditionally everywhere for state_mutex, just to have consistent behavior. Reported-by: Johannes Weiner Signed-off-by: Stefan Richter --- drivers/ieee1394/raw1394.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/ieee1394/raw1394.c b/drivers/ieee1394/raw1394.c index 2cf4ae75beca..4bdfff0a9191 100644 --- a/drivers/ieee1394/raw1394.c +++ b/drivers/ieee1394/raw1394.c @@ -2268,7 +2268,8 @@ static ssize_t raw1394_write(struct file *file, const char __user * buffer, return -EFAULT; } - mutex_lock(&fi->state_mutex); + if (!mutex_trylock(&fi->state_mutex)) + return -EAGAIN; switch (fi->state) { case opened: @@ -2548,7 +2549,8 @@ static int raw1394_mmap(struct file *file, struct vm_area_struct *vma) struct file_info *fi = file->private_data; int ret; - mutex_lock(&fi->state_mutex); + if (!mutex_trylock(&fi->state_mutex)) + return -EAGAIN; if (fi->iso_state == RAW1394_ISO_INACTIVE) ret = -EINVAL; @@ -2669,7 +2671,8 @@ static long raw1394_ioctl(struct file *file, unsigned int cmd, break; } - mutex_lock(&fi->state_mutex); + if (!mutex_trylock(&fi->state_mutex)) + return -EAGAIN; switch (fi->iso_state) { case RAW1394_ISO_INACTIVE: From 8449fc3ae58bf8ee5acbd2280754cde67b5db128 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sun, 26 Oct 2008 12:02:03 +0100 Subject: [PATCH 012/173] ieee1394: dv1394: fix possible deadlock in multithreaded clients Fix a possible though highly unlikely deadlock: Thread A: Thread B: - acquire mmap_sem - dv1394_ioctl/read/write() - dv1394_mmap() - acquire video->mtx - acquire video->mtx - copy_to/from_user(), possible page fault: acquire mmap_sem The simplest fix is to use mutex_trylock() instead of mutex_lock() in dv1394_mmap(). This changes the behavior under contention in a way which is visible to userspace clients. However, my guess is that no clients exist which use mmap vs. ioctl/read/write on the dv1394 character device file interface in concurrent threads. Reported-by: Johannes Weiner Signed-off-by: Stefan Richter --- drivers/ieee1394/dv1394.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/ieee1394/dv1394.c b/drivers/ieee1394/dv1394.c index df70f51279d8..53329972c7db 100644 --- a/drivers/ieee1394/dv1394.c +++ b/drivers/ieee1394/dv1394.c @@ -1270,8 +1270,14 @@ static int dv1394_mmap(struct file *file, struct vm_area_struct *vma) struct video_card *video = file_to_video_card(file); int retval = -EINVAL; - /* serialize mmap */ - mutex_lock(&video->mtx); + /* + * We cannot use the blocking variant mutex_lock here because .mmap + * is called with mmap_sem held, while .ioctl, .read, .write acquire + * video->mtx and subsequently call copy_to/from_user which will + * grab mmap_sem in case of a page fault. + */ + if (!mutex_trylock(&video->mtx)) + return -EAGAIN; if ( ! video_card_initialized(video) ) { retval = do_dv1394_init_default(video); From fd9409343521eac22b6ed51686128a643c7c976b Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Thu, 30 Oct 2008 19:37:09 -0700 Subject: [PATCH 013/173] x86: add iomap_atomic*()/iounmap_atomic() on 32-bit using fixmaps Impact: introduce new APIs, separate kmap code from CONFIG_HIGHMEM This takes the code used for CONFIG_HIGHMEM memory mappings except that it's designed for dynamic IO resource mapping. These fixmaps are available even with CONFIG_HIGHMEM turned off. Signed-off-by: Keith Packard Signed-off-by: Eric Anholt Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fixmap.h | 4 +++ arch/x86/include/asm/fixmap_32.h | 4 --- arch/x86/include/asm/highmem.h | 5 +-- arch/x86/mm/Makefile | 2 +- arch/x86/mm/init_32.c | 3 +- arch/x86/mm/iomap_32.c | 59 ++++++++++++++++++++++++++++++++ include/asm-x86/iomap.h | 30 ++++++++++++++++ 7 files changed, 96 insertions(+), 11 deletions(-) create mode 100644 arch/x86/mm/iomap_32.c create mode 100644 include/asm-x86/iomap.h diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 8668a94f850e..23696d44a0af 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -9,6 +9,10 @@ extern int fixmaps_set; +extern pte_t *kmap_pte; +extern pgprot_t kmap_prot; +extern pte_t *pkmap_page_table; + void __native_set_fixmap(enum fixed_addresses idx, pte_t pte); void native_set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t flags); diff --git a/arch/x86/include/asm/fixmap_32.h b/arch/x86/include/asm/fixmap_32.h index 09f29ab5c139..c7115c1d7217 100644 --- a/arch/x86/include/asm/fixmap_32.h +++ b/arch/x86/include/asm/fixmap_32.h @@ -28,10 +28,8 @@ extern unsigned long __FIXADDR_TOP; #include #include #include -#ifdef CONFIG_HIGHMEM #include #include -#endif /* * Here we define all the compile-time 'special' virtual @@ -75,10 +73,8 @@ enum fixed_addresses { #ifdef CONFIG_X86_CYCLONE_TIMER FIX_CYCLONE_TIMER, /*cyclone timer register*/ #endif -#ifdef CONFIG_HIGHMEM FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, -#endif #ifdef CONFIG_PCI_MMCONFIG FIX_PCIE_MCFG, #endif diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h index a3b3b7c3027b..bf9276bea660 100644 --- a/arch/x86/include/asm/highmem.h +++ b/arch/x86/include/asm/highmem.h @@ -25,14 +25,11 @@ #include #include #include +#include /* declarations for highmem.c */ extern unsigned long highstart_pfn, highend_pfn; -extern pte_t *kmap_pte; -extern pgprot_t kmap_prot; -extern pte_t *pkmap_page_table; - /* * Right now we initialize only a single pte table. It can be extended * easily, subsequent pte tables have to be allocated in one physical diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 59f89b434b45..fea4565ff576 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -1,7 +1,7 @@ obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ pat.o pgtable.o gup.o -obj-$(CONFIG_X86_32) += pgtable_32.o +obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 8396868e82c5..c483f4242079 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -334,7 +334,6 @@ int devmem_is_allowed(unsigned long pagenr) return 0; } -#ifdef CONFIG_HIGHMEM pte_t *kmap_pte; pgprot_t kmap_prot; @@ -357,6 +356,7 @@ static void __init kmap_init(void) kmap_prot = PAGE_KERNEL; } +#ifdef CONFIG_HIGHMEM static void __init permanent_kmaps_init(pgd_t *pgd_base) { unsigned long vaddr; @@ -436,7 +436,6 @@ static void __init set_highmem_pages_init(void) #endif /* !CONFIG_NUMA */ #else -# define kmap_init() do { } while (0) # define permanent_kmaps_init(pgd_base) do { } while (0) # define set_highmem_pages_init() do { } while (0) #endif /* CONFIG_HIGHMEM */ diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c new file mode 100644 index 000000000000..d0151d8ce452 --- /dev/null +++ b/arch/x86/mm/iomap_32.c @@ -0,0 +1,59 @@ +/* + * Copyright © 2008 Ingo Molnar + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + */ + +#include +#include + +/* Map 'pfn' using fixed map 'type' and protections 'prot' + */ +void * +iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) +{ + enum fixed_addresses idx; + unsigned long vaddr; + + pagefault_disable(); + + idx = type + KM_TYPE_NR*smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + set_pte(kmap_pte-idx, pfn_pte(pfn, prot)); + arch_flush_lazy_mmu_mode(); + + return (void*) vaddr; +} +EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn); + +void +iounmap_atomic(void *kvaddr, enum km_type type) +{ + unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; + enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); + + /* + * Force other mappings to Oops if they'll try to access this pte + * without first remap it. Keeping stale mappings around is a bad idea + * also, in case the page changes cacheability attributes or becomes + * a protected page in a hypervisor. + */ + if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) + kpte_clear_flush(kmap_pte-idx, vaddr); + + arch_flush_lazy_mmu_mode(); + pagefault_enable(); +} +EXPORT_SYMBOL_GPL(iounmap_atomic); diff --git a/include/asm-x86/iomap.h b/include/asm-x86/iomap.h new file mode 100644 index 000000000000..c1f06289b14b --- /dev/null +++ b/include/asm-x86/iomap.h @@ -0,0 +1,30 @@ +/* + * Copyright © 2008 Ingo Molnar + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + */ + +#include +#include +#include +#include +#include +#include + +void * +iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); + +void +iounmap_atomic(void *kvaddr, enum km_type type); From 9663f2e6a6cf3f82b06d8fb699b11b80f92553ba Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Thu, 30 Oct 2008 19:38:18 -0700 Subject: [PATCH 014/173] resources: add io-mapping functions to dynamically map large device apertures Impact: add new generic io_map_*() APIs Graphics devices have large PCI apertures which would consume a significant fraction of a 32-bit address space if mapped during driver initialization. Using ioremap at runtime is impractical as it is too slow. This new set of interfaces uses atomic mappings on 32-bit processors and a large static mapping on 64-bit processors to provide reasonable 32-bit performance and optimal 64-bit performance. The current implementation sits atop the io_map_atomic fixmap-based mechanism for 32-bit processors. This includes some editorial suggestions from Randy Dunlap for Documentation/io-mapping.txt Signed-off-by: Keith Packard Signed-off-by: Eric Anholt Signed-off-by: Ingo Molnar --- Documentation/io-mapping.txt | 76 ++++++++++++++++++++++ include/linux/io-mapping.h | 118 +++++++++++++++++++++++++++++++++++ 2 files changed, 194 insertions(+) create mode 100644 Documentation/io-mapping.txt create mode 100644 include/linux/io-mapping.h diff --git a/Documentation/io-mapping.txt b/Documentation/io-mapping.txt new file mode 100644 index 000000000000..cd2f726becc8 --- /dev/null +++ b/Documentation/io-mapping.txt @@ -0,0 +1,76 @@ +The io_mapping functions in linux/io-mapping.h provide an abstraction for +efficiently mapping small regions of an I/O device to the CPU. The initial +usage is to support the large graphics aperture on 32-bit processors where +ioremap_wc cannot be used to statically map the entire aperture to the CPU +as it would consume too much of the kernel address space. + +A mapping object is created during driver initialization using + + struct io_mapping *io_mapping_create_wc(unsigned long base, + unsigned long size) + + 'base' is the bus address of the region to be made + mappable, while 'size' indicates how large a mapping region to + enable. Both are in bytes. + + This _wc variant provides a mapping which may only be used + with the io_mapping_map_atomic_wc or io_mapping_map_wc. + +With this mapping object, individual pages can be mapped either atomically +or not, depending on the necessary scheduling environment. Of course, atomic +maps are more efficient: + + void *io_mapping_map_atomic_wc(struct io_mapping *mapping, + unsigned long offset) + + 'offset' is the offset within the defined mapping region. + Accessing addresses beyond the region specified in the + creation function yields undefined results. Using an offset + which is not page aligned yields an undefined result. The + return value points to a single page in CPU address space. + + This _wc variant returns a write-combining map to the + page and may only be used with mappings created by + io_mapping_create_wc + + Note that the task may not sleep while holding this page + mapped. + + void io_mapping_unmap_atomic(void *vaddr) + + 'vaddr' must be the the value returned by the last + io_mapping_map_atomic_wc call. This unmaps the specified + page and allows the task to sleep once again. + +If you need to sleep while holding the lock, you can use the non-atomic +variant, although they may be significantly slower. + + void *io_mapping_map_wc(struct io_mapping *mapping, + unsigned long offset) + + This works like io_mapping_map_atomic_wc except it allows + the task to sleep while holding the page mapped. + + void io_mapping_unmap(void *vaddr) + + This works like io_mapping_unmap_atomic, except it is used + for pages mapped with io_mapping_map_wc. + +At driver close time, the io_mapping object must be freed: + + void io_mapping_free(struct io_mapping *mapping) + +Current Implementation: + +The initial implementation of these functions uses existing mapping +mechanisms and so provides only an abstraction layer and no new +functionality. + +On 64-bit processors, io_mapping_create_wc calls ioremap_wc for the whole +range, creating a permanent kernel-visible mapping to the resource. The +map_atomic and map functions add the requested offset to the base of the +virtual address returned by ioremap_wc. + +On 32-bit processors, io_mapping_map_atomic_wc uses io_map_atomic_prot_pfn, +which uses the fixmaps to get us a mapping to a page using an atomic fashion. +For io_mapping_map_wc, ioremap_wc() is used to get a mapping of the region. diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h new file mode 100644 index 000000000000..1b566993db6e --- /dev/null +++ b/include/linux/io-mapping.h @@ -0,0 +1,118 @@ +/* + * Copyright © 2008 Keith Packard + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef _LINUX_IO_MAPPING_H +#define _LINUX_IO_MAPPING_H + +#include +#include +#include +#include + +/* + * The io_mapping mechanism provides an abstraction for mapping + * individual pages from an io device to the CPU in an efficient fashion. + * + * See Documentation/io_mapping.txt + */ + +/* this struct isn't actually defined anywhere */ +struct io_mapping; + +#ifdef CONFIG_X86_64 + +/* Create the io_mapping object*/ +static inline struct io_mapping * +io_mapping_create_wc(unsigned long base, unsigned long size) +{ + return (struct io_mapping *) ioremap_wc(base, size); +} + +static inline void +io_mapping_free(struct io_mapping *mapping) +{ + iounmap(mapping); +} + +/* Atomic map/unmap */ +static inline void * +io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset) +{ + return ((char *) mapping) + offset; +} + +static inline void +io_mapping_unmap_atomic(void *vaddr) +{ +} + +/* Non-atomic map/unmap */ +static inline void * +io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) +{ + return ((char *) mapping) + offset; +} + +static inline void +io_mapping_unmap(void *vaddr) +{ +} + +#endif /* CONFIG_X86_64 */ + +#ifdef CONFIG_X86_32 +static inline struct io_mapping * +io_mapping_create_wc(unsigned long base, unsigned long size) +{ + return (struct io_mapping *) base; +} + +static inline void +io_mapping_free(struct io_mapping *mapping) +{ +} + +/* Atomic map/unmap */ +static inline void * +io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset) +{ + offset += (unsigned long) mapping; + return iomap_atomic_prot_pfn(offset >> PAGE_SHIFT, KM_USER0, + __pgprot(__PAGE_KERNEL_WC)); +} + +static inline void +io_mapping_unmap_atomic(void *vaddr) +{ + iounmap_atomic(vaddr, KM_USER0); +} + +static inline void * +io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) +{ + offset += (unsigned long) mapping; + return ioremap_wc(offset, PAGE_SIZE); +} + +static inline void +io_mapping_unmap(void *vaddr) +{ + iounmap(vaddr); +} +#endif /* CONFIG_X86_32 */ + +#endif /* _LINUX_IO_MAPPING_H */ From 0839ccb8ac6a9e2d5e175a4ae9c82b5c574d510d Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Thu, 30 Oct 2008 19:38:48 -0700 Subject: [PATCH 015/173] i915: use io-mapping interfaces instead of a variety of mapping kludges Impact: optimize/clean-up the IO mapping implementation of the i915 DRM driver Switch the i915 device aperture mapping to the io-mapping interface, taking advantage of the cleaner API to extend it across all of the mapping uses, including both pwrite and relocation updates. This dramatically improves performance on 64-bit kernels which were using the same slow path as 32-bit non-HIGHMEM kernels prior to this patch. Signed-off-by: Keith Packard Signed-off-by: Eric Anholt Signed-off-by: Ingo Molnar --- drivers/gpu/drm/i915/i915_drv.h | 3 + drivers/gpu/drm/i915/i915_gem.c | 174 +++++++++++++++----------------- 2 files changed, 83 insertions(+), 94 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f20ffe17df71..126b2f13258c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -31,6 +31,7 @@ #define _I915_DRV_H_ #include "i915_reg.h" +#include /* General customization: */ @@ -246,6 +247,8 @@ typedef struct drm_i915_private { struct { struct drm_mm gtt_space; + struct io_mapping *gtt_mapping; + /** * List of objects currently involved in rendering from the * ringbuffer. diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 17ae330ff269..61183b95b108 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -171,35 +171,50 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, return 0; } -/* - * Try to write quickly with an atomic kmap. Return true on success. - * - * If this fails (which includes a partial write), we'll redo the whole - * thing with the slow version. - * - * This is a workaround for the low performance of iounmap (approximate - * 10% cpu cost on normal 3D workloads). kmap_atomic on HIGHMEM kernels - * happens to let us map card memory without taking IPIs. When the vmap - * rework lands we should be able to dump this hack. +/* This is the fast write path which cannot handle + * page faults in the source data */ -static inline int fast_user_write(unsigned long pfn, char __user *user_data, - int l, int o) -{ -#ifdef CONFIG_HIGHMEM - unsigned long unwritten; - char *vaddr_atomic; - vaddr_atomic = kmap_atomic_pfn(pfn, KM_USER0); -#if WATCH_PWRITE - DRM_INFO("pwrite i %d o %d l %d pfn %ld vaddr %p\n", - i, o, l, pfn, vaddr_atomic); -#endif - unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + o, user_data, l); - kunmap_atomic(vaddr_atomic, KM_USER0); - return !unwritten; -#else +static inline int +fast_user_write(struct io_mapping *mapping, + loff_t page_base, int page_offset, + char __user *user_data, + int length) +{ + char *vaddr_atomic; + unsigned long unwritten; + + vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); + unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset, + user_data, length); + io_mapping_unmap_atomic(vaddr_atomic); + if (unwritten) + return -EFAULT; + return 0; +} + +/* Here's the write path which can sleep for + * page faults + */ + +static inline int +slow_user_write(struct io_mapping *mapping, + loff_t page_base, int page_offset, + char __user *user_data, + int length) +{ + char __iomem *vaddr; + unsigned long unwritten; + + vaddr = io_mapping_map_wc(mapping, page_base); + if (vaddr == NULL) + return -EFAULT; + unwritten = __copy_from_user(vaddr + page_offset, + user_data, length); + io_mapping_unmap(vaddr); + if (unwritten) + return -EFAULT; return 0; -#endif } static int @@ -208,10 +223,12 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj, struct drm_file *file_priv) { struct drm_i915_gem_object *obj_priv = obj->driver_private; + drm_i915_private_t *dev_priv = dev->dev_private; ssize_t remain; - loff_t offset; + loff_t offset, page_base; char __user *user_data; - int ret = 0; + int page_offset, page_length; + int ret; user_data = (char __user *) (uintptr_t) args->data_ptr; remain = args->size; @@ -235,57 +252,37 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj, obj_priv->dirty = 1; while (remain > 0) { - unsigned long pfn; - int i, o, l; - /* Operation in this page * - * i = page number - * o = offset within page - * l = bytes to copy + * page_base = page offset within aperture + * page_offset = offset within page + * page_length = bytes to copy for this page */ - i = offset >> PAGE_SHIFT; - o = offset & (PAGE_SIZE-1); - l = remain; - if ((o + l) > PAGE_SIZE) - l = PAGE_SIZE - o; + page_base = (offset & ~(PAGE_SIZE-1)); + page_offset = offset & (PAGE_SIZE-1); + page_length = remain; + if ((page_offset + remain) > PAGE_SIZE) + page_length = PAGE_SIZE - page_offset; - pfn = (dev->agp->base >> PAGE_SHIFT) + i; + ret = fast_user_write (dev_priv->mm.gtt_mapping, page_base, + page_offset, user_data, page_length); - if (!fast_user_write(pfn, user_data, l, o)) { - unsigned long unwritten; - char __iomem *vaddr; - - vaddr = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE); -#if WATCH_PWRITE - DRM_INFO("pwrite slow i %d o %d l %d " - "pfn %ld vaddr %p\n", - i, o, l, pfn, vaddr); -#endif - if (vaddr == NULL) { - ret = -EFAULT; + /* If we get a fault while copying data, then (presumably) our + * source page isn't available. In this case, use the + * non-atomic function + */ + if (ret) { + ret = slow_user_write (dev_priv->mm.gtt_mapping, + page_base, page_offset, + user_data, page_length); + if (ret) goto fail; - } - unwritten = __copy_from_user(vaddr + o, user_data, l); -#if WATCH_PWRITE - DRM_INFO("unwritten %ld\n", unwritten); -#endif - iounmap(vaddr); - if (unwritten) { - ret = -EFAULT; - goto fail; - } } - remain -= l; - user_data += l; - offset += l; + remain -= page_length; + user_data += page_length; + offset += page_length; } -#if WATCH_PWRITE && 1 - i915_gem_clflush_object(obj); - i915_gem_dump_object(obj, args->offset + args->size, __func__, ~0); - i915_gem_clflush_object(obj); -#endif fail: i915_gem_object_unpin(obj); @@ -1503,12 +1500,12 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, struct drm_i915_gem_exec_object *entry) { struct drm_device *dev = obj->dev; + drm_i915_private_t *dev_priv = dev->dev_private; struct drm_i915_gem_relocation_entry reloc; struct drm_i915_gem_relocation_entry __user *relocs; struct drm_i915_gem_object *obj_priv = obj->driver_private; int i, ret; - uint32_t last_reloc_offset = -1; - void __iomem *reloc_page = NULL; + void __iomem *reloc_page; /* Choose the GTT offset for our buffer and put it there. */ ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment); @@ -1631,26 +1628,11 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, * perform. */ reloc_offset = obj_priv->gtt_offset + reloc.offset; - if (reloc_page == NULL || - (last_reloc_offset & ~(PAGE_SIZE - 1)) != - (reloc_offset & ~(PAGE_SIZE - 1))) { - if (reloc_page != NULL) - iounmap(reloc_page); - - reloc_page = ioremap_wc(dev->agp->base + - (reloc_offset & - ~(PAGE_SIZE - 1)), - PAGE_SIZE); - last_reloc_offset = reloc_offset; - if (reloc_page == NULL) { - drm_gem_object_unreference(target_obj); - i915_gem_object_unpin(obj); - return -ENOMEM; - } - } - + reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping, + (reloc_offset & + ~(PAGE_SIZE - 1))); reloc_entry = (uint32_t __iomem *)(reloc_page + - (reloc_offset & (PAGE_SIZE - 1))); + (reloc_offset & (PAGE_SIZE - 1))); reloc_val = target_obj_priv->gtt_offset + reloc.delta; #if WATCH_BUF @@ -1659,6 +1641,7 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, readl(reloc_entry), reloc_val); #endif writel(reloc_val, reloc_entry); + io_mapping_unmap_atomic(reloc_page); /* Write the updated presumed offset for this entry back out * to the user. @@ -1674,9 +1657,6 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, drm_gem_object_unreference(target_obj); } - if (reloc_page != NULL) - iounmap(reloc_page); - #if WATCH_BUF if (0) i915_gem_dump_object(obj, 128, __func__, ~0); @@ -2518,6 +2498,10 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data, if (ret != 0) return ret; + dev_priv->mm.gtt_mapping = io_mapping_create_wc(dev->agp->base, + dev->agp->agp_info.aper_size + * 1024 * 1024); + mutex_lock(&dev->struct_mutex); BUG_ON(!list_empty(&dev_priv->mm.active_list)); BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); @@ -2535,11 +2519,13 @@ int i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { + drm_i915_private_t *dev_priv = dev->dev_private; int ret; ret = i915_gem_idle(dev); drm_irq_uninstall(dev); + io_mapping_free(dev_priv->mm.gtt_mapping); return ret; } From b27cf88e9592953ae292d05324887f2f44979433 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 31 Oct 2008 14:52:24 +0000 Subject: [PATCH 016/173] [JFFS2] Fix lack of locking in thread_should_wake() The thread_should_wake() function trawls through the list of 'very dirty' eraseblocks, determining whether the background GC thread should wake. Doing this without holding the appropriate locks is a bad idea. OLPC Trac #8615 Signed-off-by: David Woodhouse Cc: stable@kernel.org --- fs/jffs2/background.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c index 8adebd3e43c6..3cceef4ad2b7 100644 --- a/fs/jffs2/background.c +++ b/fs/jffs2/background.c @@ -85,15 +85,15 @@ static int jffs2_garbage_collect_thread(void *_c) for (;;) { allow_signal(SIGHUP); again: + spin_lock(&c->erase_completion_lock); if (!jffs2_thread_should_wake(c)) { set_current_state (TASK_INTERRUPTIBLE); + spin_unlock(&c->erase_completion_lock); D1(printk(KERN_DEBUG "jffs2_garbage_collect_thread sleeping...\n")); - /* Yes, there's a race here; we checked jffs2_thread_should_wake() - before setting current->state to TASK_INTERRUPTIBLE. But it doesn't - matter - We don't care if we miss a wakeup, because the GC thread - is only an optimisation anyway. */ schedule(); - } + } else + spin_unlock(&c->erase_completion_lock); + /* This thread is purely an optimisation. But if it runs when other things could be running, it actually makes things a From 4605b718e8b8f0dd3d811ddf90f630fd0835b7bf Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 31 Oct 2008 14:18:24 +0100 Subject: [PATCH 017/173] ALSA: hda - Disable broken mic auto-muting in Realtek codes The recent addition of automatic mic-muting is broken in some cases. The code assumes that the pin nids <= 0x18, but the digital pins can be less than 0x18. Also, it assumes the front-mic being the internal mic, but it depends on the hardware implementation actually. Instead of complex case-fixes, better to disable the code as now. Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 4eceab9bd109..a80d57cbc352 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -829,6 +829,7 @@ static void alc_sku_automute(struct hda_codec *codec) spec->jack_present ? 0 : PIN_OUT); } +#if 0 /* it's broken in some acses -- temporarily disabled */ static void alc_mic_automute(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; @@ -849,6 +850,9 @@ static void alc_mic_automute(struct hda_codec *codec) snd_hda_codec_amp_stereo(codec, 0x0b, HDA_INPUT, capsrc_idx_fmic, HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0); } +#else +#define alc_mic_automute(codec) /* NOP */ +#endif /* disabled */ /* unsolicited event for HP jack sensing */ static void alc_sku_unsol_event(struct hda_codec *codec, unsigned int res) @@ -1058,12 +1062,14 @@ static void alc_subsystem_id(struct hda_codec *codec, AC_VERB_SET_UNSOLICITED_ENABLE, AC_USRSP_EN | ALC880_HP_EVENT); +#if 0 /* it's broken in some acses -- temporarily disabled */ if (spec->autocfg.input_pins[AUTO_PIN_MIC] && spec->autocfg.input_pins[AUTO_PIN_FRONT_MIC]) snd_hda_codec_write(codec, spec->autocfg.input_pins[AUTO_PIN_MIC], 0, AC_VERB_SET_UNSOLICITED_ENABLE, AC_USRSP_EN | ALC880_MIC_EVENT); +#endif /* disabled */ spec->unsol_event = alc_sku_unsol_event; } From ee956e090e114ede6542c76a7465ed6ce118ad52 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 31 Oct 2008 17:16:31 +0100 Subject: [PATCH 018/173] ALSA: hda - Add digital-mic for ALC269 auto-probe mode The digital mic wasn't detected properly for ALC269 auto-probing mode because of its widget number. Fixed now. Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index a80d57cbc352..d327a371595c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -12244,8 +12244,26 @@ static int alc269_auto_create_multi_out_ctls(struct alc_spec *spec, return 0; } -#define alc269_auto_create_analog_input_ctls \ - alc880_auto_create_analog_input_ctls +static int alc269_auto_create_analog_input_ctls(struct alc_spec *spec, + const struct auto_pin_cfg *cfg) +{ + int err; + + err = alc880_auto_create_analog_input_ctls(spec, cfg); + if (err < 0) + return err; + /* digital-mic input pin is excluded in alc880_auto_create..() + * because it's under 0x18 + */ + if (cfg->input_pins[AUTO_PIN_MIC] == 0x12 || + cfg->input_pins[AUTO_PIN_FRONT_MIC] == 0x12) { + struct hda_input_mux *imux = &spec->private_imux; + imux->items[imux->num_items].label = "Int Mic"; + imux->items[imux->num_items].index = 0x05; + imux->num_items++; + } + return 0; +} #ifdef CONFIG_SND_HDA_POWER_SAVE #define alc269_loopbacks alc880_loopbacks From 70d9d15833864e7120c3ffcfdbd6fa61f5f9726a Mon Sep 17 00:00:00 2001 From: Will Newton Date: Tue, 28 Oct 2008 10:52:36 +0000 Subject: [PATCH 019/173] drivers/net/smc911x.c: Fix lockdep warning on xmit. dev_kfree_skb should not be called with irqs disabled, use dev_kfree_skb_irq instead. The warning caused looks like this: ====================================================== [ INFO: hard-safe -> hard-unsafe lock order detected ] 2.6.28-rc1 #273 ------------------------------------------------------ swapper/0 [HC0[0]:SC1[2]:HE0:SE0] is trying to acquire: (clock-AF_INET){-..+}, at: [<4015c17c>] _sock_def_write_space+0x28/0xd8 and this task is already holding: (&lp->lock){++..}, at: [<4013f230>] _smc911x_hard_start_xmit+0x30/0x4b8 which would create a new lock dependency: (&lp->lock){++..} -> (clock-AF_INET){-..+} Signed-off-by: Will Newton Signed-off-by: Jeff Garzik --- drivers/net/smc911x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/smc911x.c b/drivers/net/smc911x.c index f59c7772f344..5051554ff05b 100644 --- a/drivers/net/smc911x.c +++ b/drivers/net/smc911x.c @@ -499,7 +499,7 @@ static void smc911x_hardware_send_pkt(struct net_device *dev) #else SMC_PUSH_DATA(lp, buf, len); dev->trans_start = jiffies; - dev_kfree_skb(skb); + dev_kfree_skb_irq(skb); #endif if (!lp->tx_throttle) { netif_wake_queue(dev); From 5a125c3c79167e78ba44efef03af7090ef28eeaf Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 22 Oct 2008 21:40:13 -0700 Subject: [PATCH 020/173] i915: Add GEM ioctl to get available aperture size. This will let userland know when to submit its batchbuffers, before they get too big to fit in the aperture. Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/i915_dma.c | 1 + drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_gem.c | 22 ++++++++++++++++++++++ include/drm/i915_drm.h | 13 +++++++++++++ 4 files changed, 38 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 01de536e0211..256e22963ae4 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -960,6 +960,7 @@ struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF(DRM_I915_GEM_SW_FINISH, i915_gem_sw_finish_ioctl, 0), DRM_IOCTL_DEF(DRM_I915_GEM_SET_TILING, i915_gem_set_tiling, 0), DRM_IOCTL_DEF(DRM_I915_GEM_GET_TILING, i915_gem_get_tiling, 0), + DRM_IOCTL_DEF(DRM_I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, 0), }; int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 901e80cf5813..cc8a9f3f7a60 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -502,6 +502,8 @@ int i915_gem_set_tiling(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_get_tiling(struct drm_device *dev, void *data, struct drm_file *file_priv); +int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); void i915_gem_load(struct drm_device *dev); int i915_gem_proc_init(struct drm_minor *minor); void i915_gem_proc_cleanup(struct drm_minor *minor); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 17ae330ff269..c1733ac4a7f5 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -79,6 +79,28 @@ i915_gem_init_ioctl(struct drm_device *dev, void *data, return 0; } +int +i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_gem_get_aperture *args = data; + struct drm_i915_gem_object *obj_priv; + + if (!(dev->driver->driver_features & DRIVER_GEM)) + return -ENODEV; + + args->aper_size = dev->gtt_total; + args->aper_available_size = args->aper_size; + + list_for_each_entry(obj_priv, &dev_priv->mm.active_list, list) { + if (obj_priv->pin_count > 0) + args->aper_available_size -= obj_priv->obj->size; + } + + return 0; +} + /** * Creates a new mm object and returns a handle to it. diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h index eb4b35031a55..152b34da927c 100644 --- a/include/drm/i915_drm.h +++ b/include/drm/i915_drm.h @@ -159,6 +159,7 @@ typedef struct _drm_i915_sarea { #define DRM_I915_GEM_SW_FINISH 0x20 #define DRM_I915_GEM_SET_TILING 0x21 #define DRM_I915_GEM_GET_TILING 0x22 +#define DRM_I915_GEM_GET_APERTURE 0x23 #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) @@ -190,6 +191,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_GEM_SW_FINISH DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_SW_FINISH, struct drm_i915_gem_sw_finish) #define DRM_IOCTL_I915_GEM_SET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_SET_TILING, struct drm_i915_gem_set_tiling) #define DRM_IOCTL_I915_GEM_GET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct drm_i915_gem_get_tiling) +#define DRM_IOCTL_I915_GEM_GET_APERTURE DRM_IOR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_APERTURE, struct drm_i915_gem_get_aperture) /* Allow drivers to submit batchbuffers directly to hardware, relying * on the security mechanisms provided by hardware. @@ -600,4 +602,15 @@ struct drm_i915_gem_get_tiling { uint32_t swizzle_mode; }; +struct drm_i915_gem_get_aperture { + /** Total size of the aperture used by i915_gem_execbuffer, in bytes */ + uint64_t aper_size; + + /** + * Available space in the aperture used by i915_gem_execbuffer, in + * bytes + */ + uint64_t aper_available_size; +}; + #endif /* _I915_DRM_H_ */ From f26ba1751145edbf52b2c89a40e389f2fbdfc1af Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 2 Nov 2008 16:11:01 +0000 Subject: [PATCH 021/173] udp: Fix the SNMP counter of UDP_MIB_INDATAGRAMS If UDP echo is sent to xinetd/echo-dgram, the UDP reply will be received at the sender. But the SNMP counter of UDP_MIB_INDATAGRAMS will be not increased, UDP6_MIB_INDATAGRAMS will be increased instead. Endpoint A Endpoint B UDP Echo request -----------> (IPv4, Dst port=7) <---------- UDP Echo Reply (IPv4, Src port=7) This bug is come from this patch cb75994ec311b2cd50e5205efdcc0696abd6675d. It do counter UDP[6]_MIB_INDATAGRAMS until udp[v6]_recvmsg. Because xinetd used IPv6 socket to receive UDP messages, thus, when received UDP packet, the UDP6_MIB_INDATAGRAMS will be increased in function udpv6_recvmsg() even if the packet is a IPv4 UDP packet. This patch fixed the problem. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/ipv6/udp.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 71e259e866a1..18696af106d6 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -138,6 +138,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, int peeked; int err; int is_udplite = IS_UDPLITE(sk); + int is_udp4; if (addr_len) *addr_len=sizeof(struct sockaddr_in6); @@ -158,6 +159,8 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, else if (copied < ulen) msg->msg_flags |= MSG_TRUNC; + is_udp4 = (skb->protocol == htons(ETH_P_IP)); + /* * If checksum is needed at all, try to do it while copying the * data. If the data is truncated, or if we only want a partial @@ -180,9 +183,14 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, if (err) goto out_free; - if (!peeked) - UDP6_INC_STATS_USER(sock_net(sk), - UDP_MIB_INDATAGRAMS, is_udplite); + if (!peeked) { + if (is_udp4) + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_INDATAGRAMS, is_udplite); + else + UDP6_INC_STATS_USER(sock_net(sk), + UDP_MIB_INDATAGRAMS, is_udplite); + } sock_recv_timestamp(msg, sk, skb); @@ -196,7 +204,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, sin6->sin6_flowinfo = 0; sin6->sin6_scope_id = 0; - if (skb->protocol == htons(ETH_P_IP)) + if (is_udp4) ipv6_addr_set(&sin6->sin6_addr, 0, 0, htonl(0xffff), ip_hdr(skb)->saddr); else { @@ -207,7 +215,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, } } - if (skb->protocol == htons(ETH_P_IP)) { + if (is_udp4) { if (inet->cmsg_flags) ip_cmsg_recv(msg, skb); } else { From 0856f93958c488f0cc656be53c26dfd20663bdb3 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 2 Nov 2008 16:14:27 +0000 Subject: [PATCH 022/173] udp: Fix the SNMP counter of UDP_MIB_INERRORS UDP packets received in udpv6_recvmsg() are not only IPv6 UDP packets, but also have IPv4 UDP packets, so when do the counter of UDP_MIB_INERRORS in udpv6_recvmsg(), we should check whether the packet is a IPv6 UDP packet or a IPv4 UDP packet. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/ipv6/udp.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 18696af106d6..8b48512ebf6a 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -236,8 +236,14 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, csum_copy_err: lock_sock(sk); - if (!skb_kill_datagram(sk, skb, flags)) - UDP6_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + if (!skb_kill_datagram(sk, skb, flags)) { + if (is_udp4) + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_INERRORS, is_udplite); + else + UDP6_INC_STATS_USER(sock_net(sk), + UDP_MIB_INERRORS, is_udplite); + } release_sock(sk); if (flags & MSG_DONTWAIT) From 219df32faec97349516c29f33008fea59a46e99a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 3 Nov 2008 08:17:05 +0100 Subject: [PATCH 023/173] ALSA: rawmidi - Add open check in rawmidi callbacks The drivers (e.g. mtpav) may call rawmidi functions in irq handlers even though the streams are not opened. This results in Oops or panic. This patch adds the rawmidi state check before actually operating the rawmidi buffers. Tested-by: Ingo Molnar Signed-off-by: Takashi Iwai --- sound/core/rawmidi.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index c4995c9f5730..39672f68ce5d 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -148,6 +148,8 @@ static int snd_rawmidi_runtime_free(struct snd_rawmidi_substream *substream) static inline void snd_rawmidi_output_trigger(struct snd_rawmidi_substream *substream,int up) { + if (!substream->opened) + return; if (up) { tasklet_hi_schedule(&substream->runtime->tasklet); } else { @@ -158,6 +160,8 @@ static inline void snd_rawmidi_output_trigger(struct snd_rawmidi_substream *subs static void snd_rawmidi_input_trigger(struct snd_rawmidi_substream *substream, int up) { + if (!substream->opened) + return; substream->ops->trigger(substream, up); if (!up && substream->runtime->event) tasklet_kill(&substream->runtime->tasklet); @@ -857,6 +861,8 @@ int snd_rawmidi_receive(struct snd_rawmidi_substream *substream, int result = 0, count1; struct snd_rawmidi_runtime *runtime = substream->runtime; + if (!substream->opened) + return -EBADFD; if (runtime->buffer == NULL) { snd_printd("snd_rawmidi_receive: input is not active!!!\n"); return -EINVAL; @@ -1126,6 +1132,8 @@ int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count) int snd_rawmidi_transmit(struct snd_rawmidi_substream *substream, unsigned char *buffer, int count) { + if (!substream->opened) + return -EBADFD; count = snd_rawmidi_transmit_peek(substream, buffer, count); if (count < 0) return count; From b02555c3845f02924b8224ff1fd9a44f2c144dbb Mon Sep 17 00:00:00 2001 From: Zoltan Devai Date: Mon, 3 Nov 2008 00:30:28 +0100 Subject: [PATCH 024/173] ALSA: Fix PIT lockup on some chipsets when using the PC-Speaker Fix PIT lockup on some chipsets when using the PC-Speaker. Signed-off-by: Zoltan Devai Signed-off-by: Takashi Iwai --- sound/drivers/pcsp/pcsp_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/drivers/pcsp/pcsp_input.c b/sound/drivers/pcsp/pcsp_input.c index cd9b83e7f7d1..0444cdeb4bec 100644 --- a/sound/drivers/pcsp/pcsp_input.c +++ b/sound/drivers/pcsp/pcsp_input.c @@ -24,13 +24,13 @@ static void pcspkr_do_sound(unsigned int count) spin_lock_irqsave(&i8253_lock, flags); if (count) { - /* enable counter 2 */ - outb_p(inb_p(0x61) | 3, 0x61); /* set command for counter 2, 2 byte write */ outb_p(0xB6, 0x43); /* select desired HZ */ outb_p(count & 0xff, 0x42); outb((count >> 8) & 0xff, 0x42); + /* enable counter 2 */ + outb_p(inb_p(0x61) | 3, 0x61); } else { /* disable counter 2 */ outb(inb_p(0x61) & 0xFC, 0x61); From bb072bf0980abbe4fc905df91857a8ad34ca673c Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Sun, 2 Nov 2008 03:50:35 +0100 Subject: [PATCH 025/173] sound: struct device - replace bus_id with dev_name(), dev_set_name() [stripped sound/isa/* changes, replaced with the next patch -- tiwai] Signed-off-by: Kay Sievers Signed-off-by: Takashi Iwai --- sound/aoa/soundbus/core.c | 2 +- sound/drivers/ml403-ac97cr.c | 4 ++-- sound/soc/soc-core.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sound/aoa/soundbus/core.c b/sound/aoa/soundbus/core.c index f84f3e505788..fa8ab2815a98 100644 --- a/sound/aoa/soundbus/core.c +++ b/sound/aoa/soundbus/core.c @@ -176,7 +176,7 @@ int soundbus_add_one(struct soundbus_dev *dev) return -EINVAL; } - snprintf(dev->ofdev.dev.bus_id, BUS_ID_SIZE, "soundbus:%x", ++devcount); + dev_set_name(&dev->ofdev.dev, "soundbus:%x", ++devcount); dev->ofdev.dev.bus = &soundbus_bus_type; return of_device_register(&dev->ofdev); } diff --git a/sound/drivers/ml403-ac97cr.c b/sound/drivers/ml403-ac97cr.c index ecdbeb6d3603..7783843ca9ae 100644 --- a/sound/drivers/ml403-ac97cr.c +++ b/sound/drivers/ml403-ac97cr.c @@ -1153,7 +1153,7 @@ snd_ml403_ac97cr_create(struct snd_card *card, struct platform_device *pfdev, /* get irq */ irq = platform_get_irq(pfdev, 0); if (request_irq(irq, snd_ml403_ac97cr_irq, IRQF_DISABLED, - pfdev->dev.bus_id, (void *)ml403_ac97cr)) { + dev_name(&pfdev->dev), (void *)ml403_ac97cr)) { snd_printk(KERN_ERR SND_ML403_AC97CR_DRIVER ": " "unable to grab IRQ %d\n", irq); @@ -1166,7 +1166,7 @@ snd_ml403_ac97cr_create(struct snd_card *card, struct platform_device *pfdev, ml403_ac97cr->irq); irq = platform_get_irq(pfdev, 1); if (request_irq(irq, snd_ml403_ac97cr_irq, IRQF_DISABLED, - pfdev->dev.bus_id, (void *)ml403_ac97cr)) { + dev_name(&pfdev->dev), (void *)ml403_ac97cr)) { snd_printk(KERN_ERR SND_ML403_AC97CR_DRIVER ": " "unable to grab IRQ %d\n", irq); diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 462e635dfc74..0af3bbde5bbe 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -95,8 +95,8 @@ static int soc_ac97_dev_register(struct snd_soc_codec *codec) codec->ac97->dev.parent = NULL; codec->ac97->dev.release = soc_ac97_device_release; - snprintf(codec->ac97->dev.bus_id, BUS_ID_SIZE, "%d-%d:%s", - codec->card->number, 0, codec->name); + dev_set_name(&codec->ac97->dev, "%d-%d:%s", + codec->card->number, 0, codec->name); err = device_register(&codec->ac97->dev); if (err < 0) { snd_printk(KERN_ERR "Can't register ac97 bus\n"); From 0418ff0c8e48aae4e5e0cbcd3e19a057bea2e55a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 3 Nov 2008 08:51:33 +0100 Subject: [PATCH 026/173] ALSA: remove direct access of dev->bus_id in sound/isa/* Removed the direct accesses of dev->bus_id in sound/isa/* by replacement with dev_err() or dev_warn() functions. Signed-off-by: Takashi Iwai --- sound/isa/ad1848/ad1848.c | 6 +++--- sound/isa/adlib.c | 12 ++++++------ sound/isa/cs423x/cs4231.c | 8 ++++---- sound/isa/cs423x/cs4236.c | 8 ++++---- sound/isa/es1688/es1688.c | 9 +++------ sound/isa/gus/gusclassic.c | 13 +++++-------- sound/isa/gus/gusextreme.c | 19 +++++++------------ sound/isa/sb/sb8.c | 4 ++-- 8 files changed, 34 insertions(+), 45 deletions(-) diff --git a/sound/isa/ad1848/ad1848.c b/sound/isa/ad1848/ad1848.c index b68d20edc20f..223a6c038819 100644 --- a/sound/isa/ad1848/ad1848.c +++ b/sound/isa/ad1848/ad1848.c @@ -70,15 +70,15 @@ static int __devinit snd_ad1848_match(struct device *dev, unsigned int n) return 0; if (port[n] == SNDRV_AUTO_PORT) { - snd_printk(KERN_ERR "%s: please specify port\n", dev->bus_id); + dev_err(dev, "please specify port\n"); return 0; } if (irq[n] == SNDRV_AUTO_IRQ) { - snd_printk(KERN_ERR "%s: please specify irq\n", dev->bus_id); + dev_err(dev, "please specify irq\n"); return 0; } if (dma1[n] == SNDRV_AUTO_DMA) { - snd_printk(KERN_ERR "%s: please specify dma1\n", dev->bus_id); + dev_err(dev, "please specify dma1\n"); return 0; } return 1; diff --git a/sound/isa/adlib.c b/sound/isa/adlib.c index efa8c80d05b6..374b7177e111 100644 --- a/sound/isa/adlib.c +++ b/sound/isa/adlib.c @@ -36,7 +36,7 @@ static int __devinit snd_adlib_match(struct device *dev, unsigned int n) return 0; if (port[n] == SNDRV_AUTO_PORT) { - snd_printk(KERN_ERR "%s: please specify port\n", dev->bus_id); + dev_err(dev, "please specify port\n"); return 0; } return 1; @@ -55,13 +55,13 @@ static int __devinit snd_adlib_probe(struct device *dev, unsigned int n) card = snd_card_new(index[n], id[n], THIS_MODULE, 0); if (!card) { - snd_printk(KERN_ERR "%s: could not create card\n", dev->bus_id); + dev_err(dev, "could not create card\n"); return -EINVAL; } card->private_data = request_region(port[n], 4, CRD_NAME); if (!card->private_data) { - snd_printk(KERN_ERR "%s: could not grab ports\n", dev->bus_id); + dev_err(dev, "could not grab ports\n"); error = -EBUSY; goto out; } @@ -73,13 +73,13 @@ static int __devinit snd_adlib_probe(struct device *dev, unsigned int n) error = snd_opl3_create(card, port[n], port[n] + 2, OPL3_HW_AUTO, 1, &opl3); if (error < 0) { - snd_printk(KERN_ERR "%s: could not create OPL\n", dev->bus_id); + dev_err(dev, "could not create OPL\n"); goto out; } error = snd_opl3_hwdep_new(opl3, 0, 0, NULL); if (error < 0) { - snd_printk(KERN_ERR "%s: could not create FM\n", dev->bus_id); + dev_err(dev, "could not create FM\n"); goto out; } @@ -87,7 +87,7 @@ static int __devinit snd_adlib_probe(struct device *dev, unsigned int n) error = snd_card_register(card); if (error < 0) { - snd_printk(KERN_ERR "%s: could not register card\n", dev->bus_id); + dev_err(dev, "could not register card\n"); goto out; } diff --git a/sound/isa/cs423x/cs4231.c b/sound/isa/cs423x/cs4231.c index ddd289120aa8..f019d449e2d6 100644 --- a/sound/isa/cs423x/cs4231.c +++ b/sound/isa/cs423x/cs4231.c @@ -74,15 +74,15 @@ static int __devinit snd_cs4231_match(struct device *dev, unsigned int n) return 0; if (port[n] == SNDRV_AUTO_PORT) { - snd_printk(KERN_ERR "%s: please specify port\n", dev->bus_id); + dev_err(dev, "please specify port\n"); return 0; } if (irq[n] == SNDRV_AUTO_IRQ) { - snd_printk(KERN_ERR "%s: please specify irq\n", dev->bus_id); + dev_err(dev, "please specify irq\n"); return 0; } if (dma1[n] == SNDRV_AUTO_DMA) { - snd_printk(KERN_ERR "%s: please specify dma1\n", dev->bus_id); + dev_err(dev, "please specify dma1\n"); return 0; } return 1; @@ -133,7 +133,7 @@ static int __devinit snd_cs4231_probe(struct device *dev, unsigned int n) mpu_port[n], 0, mpu_irq[n], mpu_irq[n] >= 0 ? IRQF_DISABLED : 0, NULL) < 0) - printk(KERN_WARNING "%s: MPU401 not detected\n", dev->bus_id); + dev_warn(dev, "MPU401 not detected\n"); } snd_card_set_dev(card, dev); diff --git a/sound/isa/cs423x/cs4236.c b/sound/isa/cs423x/cs4236.c index 91f9c15d3e30..019c9401663e 100644 --- a/sound/isa/cs423x/cs4236.c +++ b/sound/isa/cs423x/cs4236.c @@ -488,19 +488,19 @@ static int __devinit snd_cs423x_isa_match(struct device *pdev, return 0; if (port[dev] == SNDRV_AUTO_PORT) { - snd_printk(KERN_ERR "%s: please specify port\n", pdev->bus_id); + dev_err(pdev, "please specify port\n"); return 0; } if (cport[dev] == SNDRV_AUTO_PORT) { - snd_printk(KERN_ERR "%s: please specify cport\n", pdev->bus_id); + dev_err(pdev, "please specify cport\n"); return 0; } if (irq[dev] == SNDRV_AUTO_IRQ) { - snd_printk(KERN_ERR "%s: please specify irq\n", pdev->bus_id); + dev_err(pdev, "please specify irq\n"); return 0; } if (dma1[dev] == SNDRV_AUTO_DMA) { - snd_printk(KERN_ERR "%s: please specify dma1\n", pdev->bus_id); + dev_err(pdev, "please specify dma1\n"); return 0; } return 1; diff --git a/sound/isa/es1688/es1688.c b/sound/isa/es1688/es1688.c index f88639ea64b2..b46377139cf8 100644 --- a/sound/isa/es1688/es1688.c +++ b/sound/isa/es1688/es1688.c @@ -88,16 +88,14 @@ static int __devinit snd_es1688_legacy_create(struct snd_card *card, if (irq[n] == SNDRV_AUTO_IRQ) { irq[n] = snd_legacy_find_free_irq(possible_irqs); if (irq[n] < 0) { - snd_printk(KERN_ERR "%s: unable to find a free IRQ\n", - dev->bus_id); + dev_err(dev, "unable to find a free IRQ\n"); return -EBUSY; } } if (dma8[n] == SNDRV_AUTO_DMA) { dma8[n] = snd_legacy_find_free_dma(possible_dmas); if (dma8[n] < 0) { - snd_printk(KERN_ERR "%s: unable to find a free DMA\n", - dev->bus_id); + dev_err(dev, "unable to find a free DMA\n"); return -EBUSY; } } @@ -147,8 +145,7 @@ static int __devinit snd_es1688_probe(struct device *dev, unsigned int n) if (snd_opl3_create(card, chip->port, chip->port + 2, OPL3_HW_OPL3, 0, &opl3) < 0) - printk(KERN_WARNING "%s: opl3 not detected at 0x%lx\n", - dev->bus_id, chip->port); + dev_warn(dev, "opl3 not detected at 0x%lx\n", chip->port); else { error = snd_opl3_hwdep_new(opl3, 0, 1, NULL); if (error < 0) diff --git a/sound/isa/gus/gusclassic.c b/sound/isa/gus/gusclassic.c index 8f914b37bf89..426532a4d730 100644 --- a/sound/isa/gus/gusclassic.c +++ b/sound/isa/gus/gusclassic.c @@ -90,24 +90,21 @@ static int __devinit snd_gusclassic_create(struct snd_card *card, if (irq[n] == SNDRV_AUTO_IRQ) { irq[n] = snd_legacy_find_free_irq(possible_irqs); if (irq[n] < 0) { - snd_printk(KERN_ERR "%s: unable to find a free IRQ\n", - dev->bus_id); + dev_err(dev, "unable to find a free IRQ\n"); return -EBUSY; } } if (dma1[n] == SNDRV_AUTO_DMA) { dma1[n] = snd_legacy_find_free_dma(possible_dmas); if (dma1[n] < 0) { - snd_printk(KERN_ERR "%s: unable to find a free DMA1\n", - dev->bus_id); + dev_err(dev, "unable to find a free DMA1\n"); return -EBUSY; } } if (dma2[n] == SNDRV_AUTO_DMA) { dma2[n] = snd_legacy_find_free_dma(possible_dmas); if (dma2[n] < 0) { - snd_printk(KERN_ERR "%s: unable to find a free DMA2\n", - dev->bus_id); + dev_err(dev, "unable to find a free DMA2\n"); return -EBUSY; } } @@ -174,8 +171,8 @@ static int __devinit snd_gusclassic_probe(struct device *dev, unsigned int n) error = -ENODEV; if (gus->max_flag || gus->ess_flag) { - snd_printk(KERN_ERR "%s: GUS Classic or ACE soundcard was " - "not detected at 0x%lx\n", dev->bus_id, gus->gf1.port); + dev_err(dev, "GUS Classic or ACE soundcard was " + "not detected at 0x%lx\n", gus->gf1.port); goto out; } diff --git a/sound/isa/gus/gusextreme.c b/sound/isa/gus/gusextreme.c index da13185eb0a0..7ad4c3b41a84 100644 --- a/sound/isa/gus/gusextreme.c +++ b/sound/isa/gus/gusextreme.c @@ -106,16 +106,14 @@ static int __devinit snd_gusextreme_es1688_create(struct snd_card *card, if (irq[n] == SNDRV_AUTO_IRQ) { irq[n] = snd_legacy_find_free_irq(possible_irqs); if (irq[n] < 0) { - snd_printk(KERN_ERR "%s: unable to find a free IRQ " - "for ES1688\n", dev->bus_id); + dev_err(dev, "unable to find a free IRQ for ES1688\n"); return -EBUSY; } } if (dma8[n] == SNDRV_AUTO_DMA) { dma8[n] = snd_legacy_find_free_dma(possible_dmas); if (dma8[n] < 0) { - snd_printk(KERN_ERR "%s: unable to find a free DMA " - "for ES1688\n", dev->bus_id); + dev_err(dev, "unable to find a free DMA for ES1688\n"); return -EBUSY; } } @@ -143,16 +141,14 @@ static int __devinit snd_gusextreme_gus_card_create(struct snd_card *card, if (gf1_irq[n] == SNDRV_AUTO_IRQ) { gf1_irq[n] = snd_legacy_find_free_irq(possible_irqs); if (gf1_irq[n] < 0) { - snd_printk(KERN_ERR "%s: unable to find a free IRQ " - "for GF1\n", dev->bus_id); + dev_err(dev, "unable to find a free IRQ for GF1\n"); return -EBUSY; } } if (dma1[n] == SNDRV_AUTO_DMA) { dma1[n] = snd_legacy_find_free_dma(possible_dmas); if (dma1[n] < 0) { - snd_printk(KERN_ERR "%s: unable to find a free DMA " - "for GF1\n", dev->bus_id); + dev_err(dev, "unable to find a free DMA for GF1\n"); return -EBUSY; } } @@ -278,8 +274,8 @@ static int __devinit snd_gusextreme_probe(struct device *dev, unsigned int n) error = -ENODEV; if (!gus->ess_flag) { - snd_printk(KERN_ERR "%s: GUS Extreme soundcard was not " - "detected at 0x%lx\n", dev->bus_id, gus->gf1.port); + dev_err(dev, "GUS Extreme soundcard was not " + "detected at 0x%lx\n", gus->gf1.port); goto out; } gus->codec_flag = 1; @@ -310,8 +306,7 @@ static int __devinit snd_gusextreme_probe(struct device *dev, unsigned int n) if (snd_opl3_create(card, es1688->port, es1688->port + 2, OPL3_HW_OPL3, 0, &opl3) < 0) - printk(KERN_ERR "%s: opl3 not detected at 0x%lx\n", - dev->bus_id, es1688->port); + dev_warn(dev, "opl3 not detected at 0x%lx\n", es1688->port); else { error = snd_opl3_hwdep_new(opl3, 0, 2, NULL); if (error < 0) diff --git a/sound/isa/sb/sb8.c b/sound/isa/sb/sb8.c index 336a34277907..667eccc676a4 100644 --- a/sound/isa/sb/sb8.c +++ b/sound/isa/sb/sb8.c @@ -85,11 +85,11 @@ static int __devinit snd_sb8_match(struct device *pdev, unsigned int dev) if (!enable[dev]) return 0; if (irq[dev] == SNDRV_AUTO_IRQ) { - snd_printk(KERN_ERR "%s: please specify irq\n", pdev->bus_id); + dev_err(pdev, "please specify irq\n"); return 0; } if (dma8[dev] == SNDRV_AUTO_DMA) { - snd_printk(KERN_ERR "%s: please specify dma8\n", pdev->bus_id); + dev_err(pdev, "please specify dma8\n"); return 0; } return 1; From 55c8eb6c8eaa5009eed1557b296da5d4ea9c369a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 3 Nov 2008 00:04:24 -0800 Subject: [PATCH 027/173] SMC91x: Fix compilation on some platforms. This reverts 51ac3beffd4afaea4350526cf01fe74aaff25eff ('SMC91x: delete unused local variable "lp"') and adds __maybe_unused markers to these (potentially) unused variables. The issue is that in some configurations SMC_IO_SHIFT evaluates to '(lp->io_shift)', but in some others it's plain '0'. Based upon a build failure report from Manuel Lauss. Signed-off-by: David S. Miller --- drivers/net/smc91x.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/smc91x.c b/drivers/net/smc91x.c index 6f9895d4e5bd..fc80f250da31 100644 --- a/drivers/net/smc91x.c +++ b/drivers/net/smc91x.c @@ -2060,6 +2060,7 @@ static int smc_request_attrib(struct platform_device *pdev, struct net_device *ndev) { struct resource * res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "smc91x-attrib"); + struct smc_local *lp __maybe_unused = netdev_priv(ndev); if (!res) return 0; @@ -2074,6 +2075,7 @@ static void smc_release_attrib(struct platform_device *pdev, struct net_device *ndev) { struct resource * res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "smc91x-attrib"); + struct smc_local *lp __maybe_unused = netdev_priv(ndev); if (res) release_mem_region(res->start, ATTRIB_SIZE); From 69e50282b726bab75c8050c4836dc89b7eb7bf1a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 3 Nov 2008 10:07:43 +0100 Subject: [PATCH 028/173] ALSA: hda - Add a quirk for another Acer Aspire (1025:0090) Added a quirk for another Acer Aspier laptop (1025:0090) with ALC883 codec. Reported in Novell bnc#426935: https://bugzilla.novell.com/show_bug.cgi?id=426935 Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index d327a371595c..a4666c96a44f 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8414,6 +8414,7 @@ static const char *alc883_models[ALC883_MODEL_LAST] = { static struct snd_pci_quirk alc883_cfg_tbl[] = { SND_PCI_QUIRK(0x1019, 0x6668, "ECS", ALC883_3ST_6ch_DIG), SND_PCI_QUIRK(0x1025, 0x006c, "Acer Aspire 9810", ALC883_ACER_ASPIRE), + SND_PCI_QUIRK(0x1025, 0x0090, "Acer Aspire", ALC883_ACER_ASPIRE), SND_PCI_QUIRK(0x1025, 0x0110, "Acer Aspire", ALC883_ACER_ASPIRE), SND_PCI_QUIRK(0x1025, 0x0112, "Acer Aspire 9303", ALC883_ACER_ASPIRE), SND_PCI_QUIRK(0x1025, 0x0121, "Acer Aspire 5920G", ALC883_ACER_ASPIRE), From c2c80529460095035752bf0ecc1af82c1e0f6e0f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 31 Oct 2008 19:50:41 +0000 Subject: [PATCH 029/173] tracing, alpha: undefined reference to `save_stack_trace' Impact: build fix on !stacktrace architectures only select STACKTRACE on architectures that have STACKTRACE_SUPPORT ... since we also need to ifdef out the guts of ftrace_trace_stack(). We also want to disallow setting TRACE_ITER_STACKTRACE in trace_flags on such configs, but that can wait. Signed-off-by: Al Viro Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/Kconfig | 2 +- kernel/trace/trace.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index b58f43bec363..33dbefd471e8 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -25,7 +25,7 @@ config TRACING bool select DEBUG_FS select RING_BUFFER - select STACKTRACE + select STACKTRACE if STACKTRACE_SUPPORT select TRACEPOINTS select NOP_TRACER diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8a499e2adaec..85bee775a03e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -705,6 +705,7 @@ static void ftrace_trace_stack(struct trace_array *tr, unsigned long flags, int skip, int pc) { +#ifdef CONFIG_STACKTRACE struct ring_buffer_event *event; struct stack_entry *entry; struct stack_trace trace; @@ -730,6 +731,7 @@ static void ftrace_trace_stack(struct trace_array *tr, save_stack_trace(&trace); ring_buffer_unlock_commit(tr->buffer, event, irq_flags); +#endif } void __trace_stack(struct trace_array *tr, From 55e03a68d2489d116a5c5e8111ecef3f69831ed6 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 3 Nov 2008 10:21:36 +0100 Subject: [PATCH 030/173] ALSA: emu10k1 - Add more invert_shared_spdif flag to Audigy models Reported in Novell bnc#440862: https://bugzilla.novell.com/show_bug.cgi?id=440862 Signed-off-by: Takashi Iwai --- sound/pci/emu10k1/emu10k1_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/pci/emu10k1/emu10k1_main.c b/sound/pci/emu10k1/emu10k1_main.c index 2f283ea6ad9a..de5ee8f097f6 100644 --- a/sound/pci/emu10k1/emu10k1_main.c +++ b/sound/pci/emu10k1/emu10k1_main.c @@ -1464,6 +1464,7 @@ static struct snd_emu_chip_details emu_chip_details[] = { .ca0151_chip = 1, .spk71 = 1, .spdif_bug = 1, + .invert_shared_spdif = 1, /* digital/analog switch swapped */ .ac97_chip = 1} , {.vendor = 0x1102, .device = 0x0004, .subsystem = 0x20021102, .driver = "Audigy2", .name = "Audigy 2 ZS [SB0350]", @@ -1473,6 +1474,7 @@ static struct snd_emu_chip_details emu_chip_details[] = { .ca0151_chip = 1, .spk71 = 1, .spdif_bug = 1, + .invert_shared_spdif = 1, /* digital/analog switch swapped */ .ac97_chip = 1} , {.vendor = 0x1102, .device = 0x0004, .subsystem = 0x20011102, .driver = "Audigy2", .name = "Audigy 2 ZS [2001]", @@ -1482,6 +1484,7 @@ static struct snd_emu_chip_details emu_chip_details[] = { .ca0151_chip = 1, .spk71 = 1, .spdif_bug = 1, + .invert_shared_spdif = 1, /* digital/analog switch swapped */ .ac97_chip = 1} , /* Audigy 2 */ /* Tested by James@superbug.co.uk 3rd July 2005 */ From a1caa32295d67284ecba18cd8db692c7166f0706 Mon Sep 17 00:00:00 2001 From: Arnaud Ebalard Date: Mon, 3 Nov 2008 01:30:23 -0800 Subject: [PATCH 031/173] XFRM: copy_to_user_kmaddress() reports local address twice While adding support for MIGRATE/KMADDRESS in strongSwan (as specified in draft-ebalard-mext-pfkey-enhanced-migrate-00), Andreas Steffen noticed that XFRMA_KMADDRESS attribute passed to userland contains the local address twice (remote provides local address instead of remote one). This bug in copy_to_user_kmaddress() affects only key managers that use native XFRM interface (key managers that use PF_KEY are not affected). For the record, the bug was in the initial changeset I posted which added support for KMADDRESS (13c1d18931ebb5cf407cb348ef2cd6284d68902d 'xfrm: MIGRATE enhancements (draft-ebalard-mext-pfkey-enhanced-migrate)'). Signed-off-by: Arnaud Ebalard Reported-by: Andreas Steffen Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 4a8a1abb59ee..a278a6f3b991 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1816,7 +1816,7 @@ static int copy_to_user_kmaddress(struct xfrm_kmaddress *k, struct sk_buff *skb) uk.family = k->family; uk.reserved = k->reserved; memcpy(&uk.local, &k->local, sizeof(uk.local)); - memcpy(&uk.remote, &k->local, sizeof(uk.remote)); + memcpy(&uk.remote, &k->remote, sizeof(uk.remote)); return nla_put(skb, XFRMA_KMADDRESS, sizeof(uk), &uk); } From b3aa557722b3d5858f14ca559e03461c24125aaf Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 31 Oct 2008 15:44:07 -0400 Subject: [PATCH 032/173] ftrace: use kretprobe trampoline name to test in output Impact: ia64+tracing build fix When a function is kprobed, the return address is set to the kprobe_trampoline, or something similar. This caused the output of the trace to look confusing when the parent seemed to be this "kprobe_trampoline" function. To fix this, Abhishek Sagar added a test of the instruction pointer of the parent to see if it matched the kprobe_trampoline. If it did, the output would print a "[unknown/kretprobe'd]" instead. Unfortunately, not all archs do this the same way, and the trampoline function may not be exported, which causes failures in builds. This patch will compare the name instead of the pointer to see if it matches. This prevents us from depending on a function from being exported, and should work on all archs. The worst that can happen is that an arch might use a different name and then we go back to the confusing output. At least the arch will still build. Reported-by: Abhishek Sagar Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Tested-by: Abhishek Sagar Acked-by: Abhishek Sagar --- kernel/trace/trace.c | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 85bee775a03e..9f3b478f9171 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1088,17 +1088,20 @@ static void s_stop(struct seq_file *m, void *p) mutex_unlock(&trace_types_lock); } -#define KRETPROBE_MSG "[unknown/kretprobe'd]" - #ifdef CONFIG_KRETPROBES -static inline int kretprobed(unsigned long addr) +static inline const char *kretprobed(const char *name) { - return addr == (unsigned long)kretprobe_trampoline; + static const char tramp_name[] = "kretprobe_trampoline"; + int size = sizeof(tramp_name); + + if (strncmp(tramp_name, name, size) == 0) + return "[unknown/kretprobe'd]"; + return name; } #else -static inline int kretprobed(unsigned long addr) +static inline const char *kretprobed(const char *name) { - return 0; + return name; } #endif /* CONFIG_KRETPROBES */ @@ -1107,10 +1110,13 @@ seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address) { #ifdef CONFIG_KALLSYMS char str[KSYM_SYMBOL_LEN]; + const char *name; kallsyms_lookup(address, NULL, NULL, NULL, str); - return trace_seq_printf(s, fmt, str); + name = kretprobed(str); + + return trace_seq_printf(s, fmt, name); #endif return 1; } @@ -1121,9 +1127,12 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt, { #ifdef CONFIG_KALLSYMS char str[KSYM_SYMBOL_LEN]; + const char *name; sprint_symbol(str, address); - return trace_seq_printf(s, fmt, str); + name = kretprobed(str); + + return trace_seq_printf(s, fmt, name); #endif return 1; } @@ -1377,10 +1386,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) seq_print_ip_sym(s, field->ip, sym_flags); trace_seq_puts(s, " ("); - if (kretprobed(field->parent_ip)) - trace_seq_puts(s, KRETPROBE_MSG); - else - seq_print_ip_sym(s, field->parent_ip, sym_flags); + seq_print_ip_sym(s, field->parent_ip, sym_flags); trace_seq_puts(s, ")\n"); break; } @@ -1496,12 +1502,9 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) ret = trace_seq_printf(s, " <-"); if (!ret) return TRACE_TYPE_PARTIAL_LINE; - if (kretprobed(field->parent_ip)) - ret = trace_seq_puts(s, KRETPROBE_MSG); - else - ret = seq_print_ip_sym(s, - field->parent_ip, - sym_flags); + ret = seq_print_ip_sym(s, + field->parent_ip, + sym_flags); if (!ret) return TRACE_TYPE_PARTIAL_LINE; } From 73557af5bf32c3db973050de1fb73423e8fc873e Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Fri, 31 Oct 2008 13:59:49 -0400 Subject: [PATCH 033/173] x86, voyager: fix smp_intr_init() compile breakage Impact: fix x86/Voyager build Looks like this became static on the rest of x86. Fix it up by adding an external definition to mach-voyager/setup.c Signed-off-by: Ingo Molnar --- arch/x86/include/asm/voyager.h | 1 + arch/x86/mach-voyager/setup.c | 2 +- arch/x86/mach-voyager/voyager_smp.c | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/voyager.h b/arch/x86/include/asm/voyager.h index 9c811d2e6f91..b3e647307625 100644 --- a/arch/x86/include/asm/voyager.h +++ b/arch/x86/include/asm/voyager.h @@ -520,6 +520,7 @@ extern void voyager_restart(void); extern void voyager_cat_power_off(void); extern void voyager_cat_do_common_interrupt(void); extern void voyager_handle_nmi(void); +extern void voyager_smp_intr_init(void); /* Commands for the following are */ #define VOYAGER_PSI_READ 0 #define VOYAGER_PSI_WRITE 1 diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c index 6bbdd633864c..a580b9562e76 100644 --- a/arch/x86/mach-voyager/setup.c +++ b/arch/x86/mach-voyager/setup.c @@ -27,7 +27,7 @@ static struct irqaction irq2 = { void __init intr_init_hook(void) { #ifdef CONFIG_SMP - smp_intr_init(); + voyager_smp_intr_init(); #endif setup_irq(2, &irq2); diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 7f4c6af14351..0e331652681e 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -1258,7 +1258,7 @@ static void handle_vic_irq(unsigned int irq, struct irq_desc *desc) #define QIC_SET_GATE(cpi, vector) \ set_intr_gate((cpi) + QIC_DEFAULT_CPI_BASE, (vector)) -void __init smp_intr_init(void) +void __init voyager_smp_intr_init(void) { int i; From 818e3dd30a4ff34fff6d90e87ae59c73f6a53691 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 31 Oct 2008 09:58:35 -0400 Subject: [PATCH 034/173] tracing, ring-buffer: add paranoid checks for loops While writing a new tracer, I had a bug where I caused the ring-buffer to recurse in a bad way. The bug was with the tracer I was writing and not the ring-buffer itself. But it took a long time to find the problem. This patch adds paranoid checks into the ring-buffer infrastructure that will catch bugs of this nature. Note: I put the bug back in the tracer and this patch showed the error nicely and prevented the lockup. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ring_buffer.c | 56 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index cedf4e268285..3f3380638646 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1022,8 +1022,23 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, struct ring_buffer_event *event; u64 ts, delta; int commit = 0; + int nr_loops = 0; again: + /* + * We allow for interrupts to reenter here and do a trace. + * If one does, it will cause this original code to loop + * back here. Even with heavy interrupts happening, this + * should only happen a few times in a row. If this happens + * 1000 times in a row, there must be either an interrupt + * storm or we have something buggy. + * Bail! + */ + if (unlikely(++nr_loops > 1000)) { + RB_WARN_ON(cpu_buffer, 1); + return NULL; + } + ts = ring_buffer_time_stamp(cpu_buffer->cpu); /* @@ -1532,10 +1547,23 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) { struct buffer_page *reader = NULL; unsigned long flags; + int nr_loops = 0; spin_lock_irqsave(&cpu_buffer->lock, flags); again: + /* + * This should normally only loop twice. But because the + * start of the reader inserts an empty page, it causes + * a case where we will loop three times. There should be no + * reason to loop four times (that I know of). + */ + if (unlikely(++nr_loops > 3)) { + RB_WARN_ON(cpu_buffer, 1); + reader = NULL; + goto out; + } + reader = cpu_buffer->reader_page; /* If there's more to read, return this page */ @@ -1665,6 +1693,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_event *event; struct buffer_page *reader; + int nr_loops = 0; if (!cpu_isset(cpu, buffer->cpumask)) return NULL; @@ -1672,6 +1701,19 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) cpu_buffer = buffer->buffers[cpu]; again: + /* + * We repeat when a timestamp is encountered. It is possible + * to get multiple timestamps from an interrupt entering just + * as one timestamp is about to be written. The max times + * that this can happen is the number of nested interrupts we + * can have. Nesting 10 deep of interrupts is clearly + * an anomaly. + */ + if (unlikely(++nr_loops > 10)) { + RB_WARN_ON(cpu_buffer, 1); + return NULL; + } + reader = rb_get_reader_page(cpu_buffer); if (!reader) return NULL; @@ -1722,6 +1764,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) struct ring_buffer *buffer; struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_event *event; + int nr_loops = 0; if (ring_buffer_iter_empty(iter)) return NULL; @@ -1730,6 +1773,19 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) buffer = cpu_buffer->buffer; again: + /* + * We repeat when a timestamp is encountered. It is possible + * to get multiple timestamps from an interrupt entering just + * as one timestamp is about to be written. The max times + * that this can happen is the number of nested interrupts we + * can have. Nesting 10 deep of interrupts is clearly + * an anomaly. + */ + if (unlikely(++nr_loops > 10)) { + RB_WARN_ON(cpu_buffer, 1); + return NULL; + } + if (rb_per_cpu_empty(cpu_buffer)) return NULL; From 6b3ab21ef1ac15db4b053ce0ba8eae0ef9361c8a Mon Sep 17 00:00:00 2001 From: Matthew Ranostay Date: Mon, 3 Nov 2008 08:12:43 -0500 Subject: [PATCH 035/173] ALSA: hda: make a STAC_DELL_EQ option Add support for explicitly enabling the EQ distortion hack for systems without software biquad support. Signed-off-by: Matthew Ranostay Cc: stable@kernel.org Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_sigmatel.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index df9b0bc7f878..e6085915d86d 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -69,6 +69,7 @@ enum { enum { STAC_92HD73XX_REF, STAC_DELL_M6, + STAC_DELL_EQ, STAC_92HD73XX_MODELS }; @@ -773,9 +774,7 @@ static struct hda_verb dell_eq_core_init[] = { }; static struct hda_verb dell_m6_core_init[] = { - /* set master volume to max value without distortion - * and direct control */ - { 0x1f, AC_VERB_SET_VOLUME_KNOB_CONTROL, 0xec}, + { 0x1f, AC_VERB_SET_VOLUME_KNOB_CONTROL, 0xff}, /* setup audio connections */ { 0x0d, AC_VERB_SET_CONNECT_SEL, 0x00}, { 0x0a, AC_VERB_SET_CONNECT_SEL, 0x01}, @@ -1600,11 +1599,13 @@ static unsigned int dell_m6_pin_configs[13] = { static unsigned int *stac92hd73xx_brd_tbl[STAC_92HD73XX_MODELS] = { [STAC_92HD73XX_REF] = ref92hd73xx_pin_configs, [STAC_DELL_M6] = dell_m6_pin_configs, + [STAC_DELL_EQ] = dell_m6_pin_configs, }; static const char *stac92hd73xx_models[STAC_92HD73XX_MODELS] = { [STAC_92HD73XX_REF] = "ref", [STAC_DELL_M6] = "dell-m6", + [STAC_DELL_EQ] = "dell-eq", }; static struct snd_pci_quirk stac92hd73xx_cfg_tbl[] = { @@ -4131,12 +4132,17 @@ static int patch_stac92hd73xx(struct hda_codec *codec) sizeof(stac92hd73xx_dmux)); switch (spec->board_config) { - case STAC_DELL_M6: + case STAC_DELL_EQ: spec->init = dell_eq_core_init; + /* fallthru */ + case STAC_DELL_M6: spec->num_smuxes = 0; spec->mixer = &stac92hd73xx_6ch_mixer[DELL_M6_MIXER]; spec->amp_nids = &stac92hd73xx_amp_nids[DELL_M6_AMP]; spec->num_amps = 1; + + if (!spec->init) + spec->init = dell_m6_core_init; switch (codec->subsystem_id) { case 0x1028025e: /* Analog Mics */ case 0x1028025f: @@ -4146,8 +4152,6 @@ static int patch_stac92hd73xx(struct hda_codec *codec) break; case 0x10280271: /* Digital Mics */ case 0x10280272: - spec->init = dell_m6_core_init; - /* fall-through */ case 0x10280254: case 0x10280255: stac92xx_set_config_reg(codec, 0x13, 0x90A60160); From 8d5c6603c408d91ecf543f244f10ccb8b500ad95 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Mon, 3 Nov 2008 18:21:44 +0100 Subject: [PATCH 036/173] io mapping: improve documentation Impact: add documentation Signed-off-by: Keith Packard Signed-off-by: Ingo Molnar --- Documentation/io-mapping.txt | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/Documentation/io-mapping.txt b/Documentation/io-mapping.txt index cd2f726becc8..473e43b2d588 100644 --- a/Documentation/io-mapping.txt +++ b/Documentation/io-mapping.txt @@ -71,6 +71,12 @@ range, creating a permanent kernel-visible mapping to the resource. The map_atomic and map functions add the requested offset to the base of the virtual address returned by ioremap_wc. -On 32-bit processors, io_mapping_map_atomic_wc uses io_map_atomic_prot_pfn, -which uses the fixmaps to get us a mapping to a page using an atomic fashion. -For io_mapping_map_wc, ioremap_wc() is used to get a mapping of the region. +On 32-bit processors with HIGHMEM defined, io_mapping_map_atomic_wc uses +kmap_atomic_pfn to map the specified page in an atomic fashion; +kmap_atomic_pfn isn't really supposed to be used with device pages, but it +provides an efficient mapping for this usage. + +On 32-bit processors without HIGHMEM defined, io_mapping_map_atomic_wc and +io_mapping_map_wc both use ioremap_wc, a terribly inefficient function which +performs an IPI to inform all processors about the new mapping. This results +in a significant performance penalty. From e5beae16901795223d677f15aa2fe192976278ee Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Mon, 3 Nov 2008 18:21:45 +0100 Subject: [PATCH 037/173] io mapping: clean up #ifdefs Impact: cleanup clean up ifdefs: change #ifdef CONFIG_X86_32/64 to CONFIG_HAVE_ATOMIC_IOMAP. flip around the #ifdef sections to clean up the structure. Signed-off-by: Keith Packard Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 4 ++ include/linux/io-mapping.h | 89 ++++++++++++++++++++------------------ 2 files changed, 52 insertions(+), 41 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6f20718d3156..e60c59b81bdd 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1894,6 +1894,10 @@ config SYSVIPC_COMPAT endmenu +config HAVE_ATOMIC_IOMAP + def_bool y + depends on X86_32 + source "net/Kconfig" source "drivers/Kconfig" diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index 1b566993db6e..82df31726a54 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -33,48 +33,15 @@ /* this struct isn't actually defined anywhere */ struct io_mapping; -#ifdef CONFIG_X86_64 +#ifdef CONFIG_HAVE_ATOMIC_IOMAP -/* Create the io_mapping object*/ -static inline struct io_mapping * -io_mapping_create_wc(unsigned long base, unsigned long size) -{ - return (struct io_mapping *) ioremap_wc(base, size); -} +/* + * For small address space machines, mapping large objects + * into the kernel virtual space isn't practical. Where + * available, use fixmap support to dynamically map pages + * of the object at run time. + */ -static inline void -io_mapping_free(struct io_mapping *mapping) -{ - iounmap(mapping); -} - -/* Atomic map/unmap */ -static inline void * -io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset) -{ - return ((char *) mapping) + offset; -} - -static inline void -io_mapping_unmap_atomic(void *vaddr) -{ -} - -/* Non-atomic map/unmap */ -static inline void * -io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) -{ - return ((char *) mapping) + offset; -} - -static inline void -io_mapping_unmap(void *vaddr) -{ -} - -#endif /* CONFIG_X86_64 */ - -#ifdef CONFIG_X86_32 static inline struct io_mapping * io_mapping_create_wc(unsigned long base, unsigned long size) { @@ -113,6 +80,46 @@ io_mapping_unmap(void *vaddr) { iounmap(vaddr); } -#endif /* CONFIG_X86_32 */ + +#else + +/* Create the io_mapping object*/ +static inline struct io_mapping * +io_mapping_create_wc(unsigned long base, unsigned long size) +{ + return (struct io_mapping *) ioremap_wc(base, size); +} + +static inline void +io_mapping_free(struct io_mapping *mapping) +{ + iounmap(mapping); +} + +/* Atomic map/unmap */ +static inline void * +io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset) +{ + return ((char *) mapping) + offset; +} + +static inline void +io_mapping_unmap_atomic(void *vaddr) +{ +} + +/* Non-atomic map/unmap */ +static inline void * +io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) +{ + return ((char *) mapping) + offset; +} + +static inline void +io_mapping_unmap(void *vaddr) +{ +} + +#endif /* HAVE_ATOMIC_IOMAP */ #endif /* _LINUX_IO_MAPPING_H */ From ae6884a9da56f8921e432e663b4ccb4a1851b2ea Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 3 Nov 2008 14:05:08 -0500 Subject: [PATCH 038/173] cifs: fix renaming one hardlink on top of another cifs: fix renaming one hardlink on top of another POSIX says that renaming one hardlink on top of another to the same inode is a no-op. We had the logic mostly right, but forgot to clear the return code. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index d54fa8aeaea9..ff8c68de4a92 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1361,9 +1361,11 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry, CIFS_MOUNT_MAP_SPECIAL_CHR); if (tmprc == 0 && (info_buf_source->UniqueId == - info_buf_target->UniqueId)) + info_buf_target->UniqueId)) { /* same file, POSIX says that this is a noop */ + rc = 0; goto cifs_rename_exit; + } } /* else ... BB we could add the same check for Windows by checking the UniqueId via FILE_INTERNAL_INFO */ From 7385d595751874854a6729fbaaa7f793480bbb67 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 2 Nov 2008 17:49:59 +0300 Subject: [PATCH 039/173] fs_enet: fix polling 1. compile fix for irqreturn_t type change 2. restore ->poll_controller after CONFIG_PPC_CPM_NEW_BINDING transition Signed-off-by: Alexey Dobriyan Signed-off-by: Jeff Garzik --- drivers/net/fs_enet/fs_enet-main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/fs_enet/fs_enet-main.c b/drivers/net/fs_enet/fs_enet-main.c index cb51c1fb0338..a6f49d025787 100644 --- a/drivers/net/fs_enet/fs_enet-main.c +++ b/drivers/net/fs_enet/fs_enet-main.c @@ -1099,7 +1099,9 @@ static int __devinit fs_enet_probe(struct of_device *ofdev, ndev->stop = fs_enet_close; ndev->get_stats = fs_enet_get_stats; ndev->set_multicast_list = fs_set_multicast_list; - +#ifdef CONFIG_NET_POLL_CONTROLLER + ndev->poll_controller = fs_enet_netpoll; +#endif if (fpi->use_napi) netif_napi_add(ndev, &fep->napi, fs_enet_rx_napi, fpi->napi_weight); @@ -1209,7 +1211,7 @@ static void __exit fs_cleanup(void) static void fs_enet_netpoll(struct net_device *dev) { disable_irq(dev->irq); - fs_enet_interrupt(dev->irq, dev, NULL); + fs_enet_interrupt(dev->irq, dev); enable_irq(dev->irq); } #endif From 1d19ecfc65ed01bac7a58f83004057ad704ee7cc Mon Sep 17 00:00:00 2001 From: Jeff Kirsher Date: Sun, 2 Nov 2008 20:30:33 -0800 Subject: [PATCH 040/173] net: kconfig cleanup The bool kconfig option added to ixgbe and myri10ge for DCA is ambigous, so this patch adds a description to the kconfig option. Signed-off-by: Jeff Kirsher Signed-off-by: Jeff Garzik --- drivers/net/Kconfig | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index f749b40f954e..11f143f4adf6 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -2010,9 +2010,13 @@ config IGB_LRO If in doubt, say N. config IGB_DCA - bool "Enable DCA" + bool "Direct Cache Access (DCA) Support" default y depends on IGB && DCA && !(IGB=y && DCA=m) + ---help--- + Say Y here if you want to use Direct Cache Access (DCA) in the + driver. DCA is a method for warming the CPU cache before data + is used, with the intent of lessening the impact of cache misses. source "drivers/net/ixp2000/Kconfig" @@ -2437,9 +2441,13 @@ config IXGBE will be called ixgbe. config IXGBE_DCA - bool + bool "Direct Cache Access (DCA) Support" default y depends on IXGBE && DCA && !(IXGBE=y && DCA=m) + ---help--- + Say Y here if you want to use Direct Cache Access (DCA) in the + driver. DCA is a method for warming the CPU cache before data + is used, with the intent of lessening the impact of cache misses. config IXGB tristate "Intel(R) PRO/10GbE support" @@ -2489,9 +2497,13 @@ config MYRI10GE will be called myri10ge. config MYRI10GE_DCA - bool + bool "Direct Cache Access (DCA) Support" default y depends on MYRI10GE && DCA && !(MYRI10GE=y && DCA=m) + ---help--- + Say Y here if you want to use Direct Cache Access (DCA) in the + driver. DCA is a method for warming the CPU cache before data + is used, with the intent of lessening the impact of cache misses. config NETXEN_NIC tristate "NetXen Multi port (1/10) Gigabit Ethernet NIC" From ee04448d8871e71f55520d62cf6adbf5dd403c99 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Sat, 1 Nov 2008 06:32:20 +0100 Subject: [PATCH 041/173] mv643xx_eth: fix SMI bus access timeouts The mv643xx_eth mii bus implementation uses wait_event_timeout() to wait for SMI completion interrupts. If wait_event_timeout() would return zero, mv643xx_eth would conclude that the SMI access timed out, but this is not necessarily true -- wait_event_timeout() can also return zero in the case where the SMI completion interrupt did happen in time but where it took longer than the requested timeout for the process performing the SMI access to be scheduled again. This would lead to occasional SMI access timeouts when the system would be under heavy load. The fix is to ignore the return value of wait_event_timeout(), and to re-check the SMI done bit after wait_event_timeout() returns to determine whether or not the SMI access timed out. Signed-off-by: Lennert Buytenhek Signed-off-by: Jeff Garzik --- drivers/net/mv643xx_eth.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c index a9c8c08044b1..b9dcdbd369f8 100644 --- a/drivers/net/mv643xx_eth.c +++ b/drivers/net/mv643xx_eth.c @@ -1066,9 +1066,12 @@ static int smi_wait_ready(struct mv643xx_eth_shared_private *msp) return 0; } - if (!wait_event_timeout(msp->smi_busy_wait, smi_is_done(msp), - msecs_to_jiffies(100))) - return -ETIMEDOUT; + if (!smi_is_done(msp)) { + wait_event_timeout(msp->smi_busy_wait, smi_is_done(msp), + msecs_to_jiffies(100)); + if (!smi_is_done(msp)) + return -ETIMEDOUT; + } return 0; } From 9a0354405feb0f8bd460349a93db05e4cca8d166 Mon Sep 17 00:00:00 2001 From: Eilon Greenstein Date: Mon, 3 Nov 2008 16:45:55 -0800 Subject: [PATCH 042/173] bnx2x: Removing the PMF indication when unloading When the PMF flag is set, the driver can access the HW freely. When the driver is unloaded, it should not access the HW. The problem caused fatal errors when "ethtool -i" was called after the calling instance was unloaded and another instance was already loaded Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c index fce745148ff9..61152e149e67 100644 --- a/drivers/net/bnx2x_main.c +++ b/drivers/net/bnx2x_main.c @@ -6481,6 +6481,7 @@ static int bnx2x_nic_load(struct bnx2x *bp, int load_mode) bnx2x_free_irq(bp); load_error: bnx2x_free_mem(bp); + bp->port.pmf = 0; /* TBD we really need to reset the chip if we want to recover from this */ @@ -6791,6 +6792,7 @@ static int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode) /* Report UNLOAD_DONE to MCP */ if (!BP_NOMCP(bp)) bnx2x_fw_command(bp, DRV_MSG_CODE_UNLOAD_DONE); + bp->port.pmf = 0; /* Free SKBs, SGEs, TPA pool and driver internals */ bnx2x_free_skbs(bp); From 7d96567ac0527703cf1b80043fc0ebd7f21a10ad Mon Sep 17 00:00:00 2001 From: Eilon Greenstein Date: Mon, 3 Nov 2008 16:46:19 -0800 Subject: [PATCH 043/173] bnx2x: PCI configuration bug on big-endian The current code read nothing but zeros on big-endian (wrong part of the 32bits). This caused poor performance on big-endian machines. Though this issue did not cause the system to crash, the performance is significantly better with the fix so I view it as critical bug fix. Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x_init.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/bnx2x_init.h b/drivers/net/bnx2x_init.h index 130927cfc75b..a6c0b3abba29 100644 --- a/drivers/net/bnx2x_init.h +++ b/drivers/net/bnx2x_init.h @@ -564,14 +564,15 @@ static const struct arb_line write_arb_addr[NUM_WR_Q-1] = { static void bnx2x_init_pxp(struct bnx2x *bp) { + u16 devctl; int r_order, w_order; u32 val, i; pci_read_config_word(bp->pdev, - bp->pcie_cap + PCI_EXP_DEVCTL, (u16 *)&val); - DP(NETIF_MSG_HW, "read 0x%x from devctl\n", (u16)val); - w_order = ((val & PCI_EXP_DEVCTL_PAYLOAD) >> 5); - r_order = ((val & PCI_EXP_DEVCTL_READRQ) >> 12); + bp->pcie_cap + PCI_EXP_DEVCTL, &devctl); + DP(NETIF_MSG_HW, "read 0x%x from devctl\n", devctl); + w_order = ((devctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5); + r_order = ((devctl & PCI_EXP_DEVCTL_READRQ) >> 12); if (r_order > MAX_RD_ORD) { DP(NETIF_MSG_HW, "read order of %d order adjusted to %d\n", From 12b56ea89e70d4b04f2f5199750310e82894ebbd Mon Sep 17 00:00:00 2001 From: Eilon Greenstein Date: Mon, 3 Nov 2008 16:46:40 -0800 Subject: [PATCH 044/173] bnx2x: Calling netif_carrier_off at the end of the probe netif_carrier_off was called too early at the probe. In case of failure or simply bad timing, this can cause a fatal error since linkwatch_event might run too soon. Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c index 61152e149e67..5b013d8457c5 100644 --- a/drivers/net/bnx2x_main.c +++ b/drivers/net/bnx2x_main.c @@ -10206,8 +10206,6 @@ static int __devinit bnx2x_init_one(struct pci_dev *pdev, return -ENOMEM; } - netif_carrier_off(dev); - bp = netdev_priv(dev); bp->msglevel = debug; @@ -10231,6 +10229,8 @@ static int __devinit bnx2x_init_one(struct pci_dev *pdev, goto init_one_exit; } + netif_carrier_off(dev); + bp->common.name = board_info[ent->driver_data].name; printk(KERN_INFO "%s: %s (%c%d) PCI-E x%d %s found at mem %lx," " IRQ %d, ", dev->name, bp->common.name, From ca8eac55fa554043c57fd18d595ca356e752833e Mon Sep 17 00:00:00 2001 From: Eilon Greenstein Date: Mon, 3 Nov 2008 16:46:58 -0800 Subject: [PATCH 045/173] bnx2x: Version Update Updating the version Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c index 5b013d8457c5..600210d7eff9 100644 --- a/drivers/net/bnx2x_main.c +++ b/drivers/net/bnx2x_main.c @@ -59,8 +59,8 @@ #include "bnx2x.h" #include "bnx2x_init.h" -#define DRV_MODULE_VERSION "1.45.22" -#define DRV_MODULE_RELDATE "2008/09/09" +#define DRV_MODULE_VERSION "1.45.23" +#define DRV_MODULE_RELDATE "2008/11/03" #define BNX2X_BC_VER 0x040200 /* Time in jiffies before concluding the transmitter is hung */ From 19ecb6ba800765743bb4525c66562f0d30993f8d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 3 Nov 2008 17:05:16 -0800 Subject: [PATCH 046/173] niu: Use pci_ioremap_bar(). Signed-off-by: David S. Miller --- drivers/net/niu.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/niu.c b/drivers/net/niu.c index ebc812702903..9acb5d70a3ae 100644 --- a/drivers/net/niu.c +++ b/drivers/net/niu.c @@ -8667,7 +8667,6 @@ static void __devinit niu_device_announce(struct niu *np) static int __devinit niu_pci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { - unsigned long niureg_base, niureg_len; union niu_parent_id parent_id; struct net_device *dev; struct niu *np; @@ -8758,10 +8757,7 @@ static int __devinit niu_pci_init_one(struct pci_dev *pdev, dev->features |= (NETIF_F_SG | NETIF_F_HW_CSUM); - niureg_base = pci_resource_start(pdev, 0); - niureg_len = pci_resource_len(pdev, 0); - - np->regs = ioremap_nocache(niureg_base, niureg_len); + np->regs = pci_ioremap_bar(pdev, 0); if (!np->regs) { dev_err(&pdev->dev, PFX "Cannot map device registers, " "aborting.\n"); From bbb770e7ab9a436752babfc8765e422d7481be1f Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 3 Nov 2008 19:11:29 -0800 Subject: [PATCH 047/173] xfrm: Fix xfrm_policy_gc_lock handling. From: Alexey Dobriyan Based upon a lockdep trace by Simon Arlott. xfrm_policy_kill() can be called from both BH and non-BH contexts, so we have to grab xfrm_policy_gc_lock with BH disabling. Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 25872747762c..058f04f54b90 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -315,9 +315,9 @@ static void xfrm_policy_kill(struct xfrm_policy *policy) return; } - spin_lock(&xfrm_policy_gc_lock); + spin_lock_bh(&xfrm_policy_gc_lock); hlist_add_head(&policy->bydst, &xfrm_policy_gc_list); - spin_unlock(&xfrm_policy_gc_lock); + spin_unlock_bh(&xfrm_policy_gc_lock); schedule_work(&xfrm_policy_gc_work); } From cadef677e4a9b9c1d069675043767df486782986 Mon Sep 17 00:00:00 2001 From: Mikael Pettersson Date: Fri, 31 Oct 2008 08:03:55 +0100 Subject: [PATCH 048/173] sata_promise: add ATA engine reset to reset ops Promise ATA engines need to be reset when errors occur. That's currently done for errors detected by sata_promise itself, but it's not done for errors like timeouts detected outside of the low-level driver. The effect of this omission is that a timeout tends to result in a sequence of failed COMRESETs after which libata EH gives up and disables the port. At that point the port's ATA engine hangs and even reloading the driver will not resume it. To fix this, make sata_promise override ->hardreset on SATA ports with code which calls pdc_reset_port() on the port in question before calling libata's hardreset. PATA ports don't use ->hardreset, so for those we override ->softreset instead. Signed-off-by: Mikael Pettersson Signed-off-by: Jeff Garzik --- drivers/ata/sata_promise.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/ata/sata_promise.c b/drivers/ata/sata_promise.c index 750d8cdc00cd..ba9a2570a742 100644 --- a/drivers/ata/sata_promise.c +++ b/drivers/ata/sata_promise.c @@ -153,6 +153,10 @@ static void pdc_freeze(struct ata_port *ap); static void pdc_sata_freeze(struct ata_port *ap); static void pdc_thaw(struct ata_port *ap); static void pdc_sata_thaw(struct ata_port *ap); +static int pdc_pata_softreset(struct ata_link *link, unsigned int *class, + unsigned long deadline); +static int pdc_sata_hardreset(struct ata_link *link, unsigned int *class, + unsigned long deadline); static void pdc_error_handler(struct ata_port *ap); static void pdc_post_internal_cmd(struct ata_queued_cmd *qc); static int pdc_pata_cable_detect(struct ata_port *ap); @@ -186,6 +190,7 @@ static struct ata_port_operations pdc_sata_ops = { .scr_read = pdc_sata_scr_read, .scr_write = pdc_sata_scr_write, .port_start = pdc_sata_port_start, + .hardreset = pdc_sata_hardreset, }; /* First-generation chips need a more restrictive ->check_atapi_dma op */ @@ -200,6 +205,7 @@ static struct ata_port_operations pdc_pata_ops = { .freeze = pdc_freeze, .thaw = pdc_thaw, .port_start = pdc_common_port_start, + .softreset = pdc_pata_softreset, }; static const struct ata_port_info pdc_port_info[] = { @@ -693,6 +699,20 @@ static void pdc_sata_thaw(struct ata_port *ap) readl(host_mmio + hotplug_offset); /* flush */ } +static int pdc_pata_softreset(struct ata_link *link, unsigned int *class, + unsigned long deadline) +{ + pdc_reset_port(link->ap); + return ata_sff_softreset(link, class, deadline); +} + +static int pdc_sata_hardreset(struct ata_link *link, unsigned int *class, + unsigned long deadline) +{ + pdc_reset_port(link->ap); + return sata_sff_hardreset(link, class, deadline); +} + static void pdc_error_handler(struct ata_port *ap) { if (!(ap->pflags & ATA_PFLAG_FROZEN)) From 554d491de112a378b4d1a705bb93b58bcd444a70 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Sun, 2 Nov 2008 22:18:52 +0100 Subject: [PATCH 049/173] sata_via: restore vt*_prepare_host error handling commit b9d5b89b487517cbd4cb4702da829e07ef9e4432 (sata_via: fix support for 5287) accidently (?) removed vt*_prepare_host error handling - restore it catched by gcc: drivers/ata/sata_via.c: In function 'svia_init_one': drivers/ata/sata_via.c:567: warning: 'host' may be used uninitialized in this function Signed-off-by: Marcin Slusarz Cc: Tejun Heo Cc: Joseph Chan Cc: Jeff Garzik Signed-off-by: Jeff Garzik --- drivers/ata/sata_via.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/ata/sata_via.c b/drivers/ata/sata_via.c index 62367fe4d5dc..c18935f0bda2 100644 --- a/drivers/ata/sata_via.c +++ b/drivers/ata/sata_via.c @@ -602,8 +602,10 @@ static int svia_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) rc = vt8251_prepare_host(pdev, &host); break; default: - return -EINVAL; + rc = -EINVAL; } + if (rc) + return rc; svia_configure(pdev); From 3c324283e6cdb79210cf7975c3e40d3ba3e672b2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Nov 2008 12:37:49 +0900 Subject: [PATCH 050/173] sata_nv: fix generic, nf2/3 detection regression All three flavors of sata_nv's are different in how their hardreset behaves. * generic: Hardreset is not reliable. Link often doesn't come online after hardreset. * nf2/3: A little bit better - link comes online with longer debounce timing. However, nf2/3 can't reliable wait for the first D2H Register FIS, so it can't wait for device readiness or classify the device after hardreset. Follow-up SRST required. * ck804: Hardreset finally works. The core layer change to prefer hardreset and follow up changes exposed the above issues and caused various detection regressions for all three flavors. This patch, hopefully, fixes all the known issues and should make sata_nv error handling more reliable. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/ata/sata_nv.c | 53 ++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 28 deletions(-) diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c index fae3841de0d8..6f1460614325 100644 --- a/drivers/ata/sata_nv.c +++ b/drivers/ata/sata_nv.c @@ -307,10 +307,10 @@ static int nv_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val); static void nv_nf2_freeze(struct ata_port *ap); static void nv_nf2_thaw(struct ata_port *ap); +static int nv_nf2_hardreset(struct ata_link *link, unsigned int *class, + unsigned long deadline); static void nv_ck804_freeze(struct ata_port *ap); static void nv_ck804_thaw(struct ata_port *ap); -static int nv_hardreset(struct ata_link *link, unsigned int *class, - unsigned long deadline); static int nv_adma_slave_config(struct scsi_device *sdev); static int nv_adma_check_atapi_dma(struct ata_queued_cmd *qc); static void nv_adma_qc_prep(struct ata_queued_cmd *qc); @@ -405,17 +405,8 @@ static struct scsi_host_template nv_swncq_sht = { .slave_configure = nv_swncq_slave_config, }; -/* OSDL bz3352 reports that some nv controllers can't determine device - * signature reliably and nv_hardreset is implemented to work around - * the problem. This was reported on nf3 and it's unclear whether any - * other controllers are affected. However, the workaround has been - * applied to all variants and there isn't much to gain by trying to - * find out exactly which ones are affected at this point especially - * because NV has moved over to ahci for newer controllers. - */ static struct ata_port_operations nv_common_ops = { .inherits = &ata_bmdma_port_ops, - .hardreset = nv_hardreset, .scr_read = nv_scr_read, .scr_write = nv_scr_write, }; @@ -429,12 +420,22 @@ static struct ata_port_operations nv_generic_ops = { .hardreset = ATA_OP_NULL, }; +/* OSDL bz3352 reports that nf2/3 controllers can't determine device + * signature reliably. Also, the following thread reports detection + * failure on cold boot with the standard debouncing timing. + * + * http://thread.gmane.org/gmane.linux.ide/34098 + * + * Debounce with hotplug timing and request follow-up SRST. + */ static struct ata_port_operations nv_nf2_ops = { .inherits = &nv_common_ops, .freeze = nv_nf2_freeze, .thaw = nv_nf2_thaw, + .hardreset = nv_nf2_hardreset, }; +/* CK804 finally gets hardreset right */ static struct ata_port_operations nv_ck804_ops = { .inherits = &nv_common_ops, .freeze = nv_ck804_freeze, @@ -443,7 +444,7 @@ static struct ata_port_operations nv_ck804_ops = { }; static struct ata_port_operations nv_adma_ops = { - .inherits = &nv_common_ops, + .inherits = &nv_ck804_ops, .check_atapi_dma = nv_adma_check_atapi_dma, .sff_tf_read = nv_adma_tf_read, @@ -467,7 +468,7 @@ static struct ata_port_operations nv_adma_ops = { }; static struct ata_port_operations nv_swncq_ops = { - .inherits = &nv_common_ops, + .inherits = &nv_generic_ops, .qc_defer = ata_std_qc_defer, .qc_prep = nv_swncq_qc_prep, @@ -1553,6 +1554,17 @@ static void nv_nf2_thaw(struct ata_port *ap) iowrite8(mask, scr_addr + NV_INT_ENABLE); } +static int nv_nf2_hardreset(struct ata_link *link, unsigned int *class, + unsigned long deadline) +{ + bool online; + int rc; + + rc = sata_link_hardreset(link, sata_deb_timing_hotplug, deadline, + &online, NULL); + return online ? -EAGAIN : rc; +} + static void nv_ck804_freeze(struct ata_port *ap) { void __iomem *mmio_base = ap->host->iomap[NV_MMIO_BAR]; @@ -1605,21 +1617,6 @@ static void nv_mcp55_thaw(struct ata_port *ap) ata_sff_thaw(ap); } -static int nv_hardreset(struct ata_link *link, unsigned int *class, - unsigned long deadline) -{ - int rc; - - /* SATA hardreset fails to retrieve proper device signature on - * some controllers. Request follow up SRST. For more info, - * see http://bugzilla.kernel.org/show_bug.cgi?id=3352 - */ - rc = sata_sff_hardreset(link, class, deadline); - if (rc) - return rc; - return -EAGAIN; -} - static void nv_adma_error_handler(struct ata_port *ap) { struct nv_adma_port_priv *pp = ap->private_data; From a464189de350b050aa8f334bd4cc53ed406e56dd Mon Sep 17 00:00:00 2001 From: Elias Oltmanns Date: Mon, 3 Nov 2008 19:01:08 +0900 Subject: [PATCH 051/173] libata: Fix a potential race condition in ata_scsi_park_show() Peter Moulder has pointed out that there is a slight chance that a negative value might be passed to jiffies_to_msecs() in ata_scsi_park_show(). This is fixed by saving the value of jiffies in a local variable, thus also reducing code since the volatile variable jiffies is accessed only once. Signed-off-by: Elias Oltmanns Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/ata/libata-scsi.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index bbb30d882f05..3fa75eac135d 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -190,7 +190,7 @@ static ssize_t ata_scsi_park_show(struct device *device, struct ata_port *ap; struct ata_link *link; struct ata_device *dev; - unsigned long flags; + unsigned long flags, now; unsigned int uninitialized_var(msecs); int rc = 0; @@ -208,10 +208,11 @@ static ssize_t ata_scsi_park_show(struct device *device, } link = dev->link; + now = jiffies; if (ap->pflags & ATA_PFLAG_EH_IN_PROGRESS && link->eh_context.unloaded_mask & (1 << dev->devno) && - time_after(dev->unpark_deadline, jiffies)) - msecs = jiffies_to_msecs(dev->unpark_deadline - jiffies); + time_after(dev->unpark_deadline, now)) + msecs = jiffies_to_msecs(dev->unpark_deadline - now); else msecs = 0; From 6a87e42e955ff27e07a77f65f8f077dc7c4171e1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Nov 2008 19:01:09 +0900 Subject: [PATCH 052/173] libata: implement ATA_HORKAGE_ATAPI_MOD16_DMA and apply it libata always uses PIO for ATAPI commands when the number of bytes to transfer isn't multiple of 16 but quantum DAT72 chokes on odd bytes PIO transfers. Implement a horkage to skip the mod16 check and apply it to the quantum device. This is reported by John Clark in the following thread. http://thread.gmane.org/gmane.linux.ide/34748 Signed-off-by: Tejun Heo Cc: John Clark Signed-off-by: Jeff Garzik --- drivers/ata/libata-core.c | 4 +++- include/linux/libata.h | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 82af7011f2dd..91b478f20557 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4024,6 +4024,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { /* Weird ATAPI devices */ { "TORiSAN DVD-ROM DRD-N216", NULL, ATA_HORKAGE_MAX_SEC_128 }, + { "QUANTUM DAT DAT72-000", NULL, ATA_HORKAGE_ATAPI_MOD16_DMA }, /* Devices we expect to fail diagnostics */ @@ -4444,7 +4445,8 @@ int atapi_check_dma(struct ata_queued_cmd *qc) /* Don't allow DMA if it isn't multiple of 16 bytes. Quite a * few ATAPI devices choke on such DMA requests. */ - if (unlikely(qc->nbytes & 15)) + if (!(qc->dev->horkage & ATA_HORKAGE_ATAPI_MOD16_DMA) && + unlikely(qc->nbytes & 15)) return 1; if (ap->ops->check_atapi_dma) diff --git a/include/linux/libata.h b/include/linux/libata.h index f5441edee55f..c7665a4134c5 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -373,6 +373,8 @@ enum { ATA_HORKAGE_IVB = (1 << 8), /* cbl det validity bit bugs */ ATA_HORKAGE_STUCK_ERR = (1 << 9), /* stuck ERR on next PACKET */ ATA_HORKAGE_BRIDGE_OK = (1 << 10), /* no bridge limits */ + ATA_HORKAGE_ATAPI_MOD16_DMA = (1 << 11), /* use ATAPI DMA for commands + not multiple of 16 bytes */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ From 299246f9a2a4c5c531863d72bad7ebd0de213de9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Nov 2008 19:27:07 +0900 Subject: [PATCH 053/173] libata: mask off DET when restoring SControl for detach libata restores SControl on detach; however, trying to restore non-zero DET can cause undeterministic behavior including PMP device going offline till power cycling. Mask off DET when restoring SControl. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/ata/libata-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 91b478f20557..622350d9b2e3 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5936,7 +5936,7 @@ static void ata_port_detach(struct ata_port *ap) * to us. Restore SControl and disable all existing devices. */ __ata_port_for_each_link(link, ap) { - sata_scr_write(link, SCR_CONTROL, link->saved_scontrol); + sata_scr_write(link, SCR_CONTROL, link->saved_scontrol & 0xff0); ata_link_for_each_dev(dev, link) ata_dev_disable(dev); } From 70de9a97049e0ba79dc040868564408d5ce697f9 Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Mon, 3 Nov 2008 11:18:47 -0800 Subject: [PATCH 054/173] x86: don't use tsc_khz to calculate lpj if notsc is passed Impact: fix udelay when "notsc" boot parameter is passed With notsc passed on commandline, tsc may not be used for udelays, make sure that we do not use tsc_khz to calculate the lpj value in such cases. Reported-by: Bartlomiej Zolnierkiewicz Signed-off-by: Alok N Kataria Cc: Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 62348e4fd8d1..2ef80e301925 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -813,10 +813,6 @@ void __init tsc_init(void) cpu_khz = calibrate_cpu(); #endif - lpj = ((u64)tsc_khz * 1000); - do_div(lpj, HZ); - lpj_fine = lpj; - printk("Detected %lu.%03lu MHz processor.\n", (unsigned long)cpu_khz / 1000, (unsigned long)cpu_khz % 1000); @@ -836,6 +832,10 @@ void __init tsc_init(void) /* now allow native_sched_clock() to use rdtsc */ tsc_disabled = 0; + lpj = ((u64)tsc_khz * 1000); + do_div(lpj, HZ); + lpj_fine = lpj; + use_tsc_delay(); /* Check and install the TSC clocksource */ dmi_check_system(bad_tsc_dmi_table); From e4ab1b3cbb8042f1653471c6333931134105d455 Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Tue, 4 Nov 2008 12:46:03 +0000 Subject: [PATCH 055/173] x86/docs: remove noirqbalance param docs Impact: documentation fix irqbalance was removed by: commit 8b8e8c1bf7275eca859fe551dfa484134eaf013b Author: Yinghai Lu Date: Tue Aug 19 20:50:23 2008 -0700 Remove the associated documentation for noirqbalance. Signed-off-by: Mark McLoughlin Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 1bbcaa8982b6..de4de3e7bc1b 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1470,8 +1470,6 @@ and is between 256 and 4096 characters. It is defined in the file Valid arguments: on, off Default: on - noirqbalance [X86-32,SMP,KNL] Disable kernel irq balancing - noirqdebug [X86-32] Disables the code which attempts to detect and disable unhandled interrupt sources. From d2ed5cb80a241518dd71f467c884bfabbe15f68c Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 2 Nov 2008 09:16:50 +0000 Subject: [PATCH 056/173] [ARM] fix VFP+softfloat binaries 2.6.28-rc tightened up the ELF architecture checks on ARM. For non-EABI it only allows VFP if the hardware supports it. However, the kernel fails to also inspect the soft-float flag, so it incorrectly rejects binaries using soft-VFP. The fix is simple: also check that EF_ARM_SOFT_FLOAT isn't set before rejecting VFP binaries on non-VFP hardware. Acked-by: Mikael Pettersson Signed-off-by: Russell King --- arch/arm/kernel/elf.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm/kernel/elf.c b/arch/arm/kernel/elf.c index 513f332f040d..84849098c8e8 100644 --- a/arch/arm/kernel/elf.c +++ b/arch/arm/kernel/elf.c @@ -21,12 +21,16 @@ int elf_check_arch(const struct elf32_hdr *x) eflags = x->e_flags; if ((eflags & EF_ARM_EABI_MASK) == EF_ARM_EABI_UNKNOWN) { + unsigned int flt_fmt; + /* APCS26 is only allowed if the CPU supports it */ if ((eflags & EF_ARM_APCS_26) && !(elf_hwcap & HWCAP_26BIT)) return 0; + flt_fmt = eflags & (EF_ARM_VFP_FLOAT | EF_ARM_SOFT_FLOAT); + /* VFP requires the supporting code */ - if ((eflags & EF_ARM_VFP_FLOAT) && !(elf_hwcap & HWCAP_VFP)) + if (flt_fmt == EF_ARM_VFP_FLOAT && !(elf_hwcap & HWCAP_VFP)) return 0; } return 1; From 54074d59320581a6d7e4f4dd405e8cac1d174b75 Mon Sep 17 00:00:00 2001 From: Jianjun Kong Date: Tue, 4 Nov 2008 21:47:07 +0800 Subject: [PATCH 057/173] drivers: remove duplicated #include Signed-off-by: Jianjun Kong Signed-off-by: Linus Torvalds --- drivers/mtd/onenand/omap2.c | 1 - drivers/net/atl1e/atl1e.h | 1 - drivers/net/ucc_geth_ethtool.c | 1 - drivers/pnp/interface.c | 1 - drivers/sbus/char/jsflash.c | 1 - drivers/staging/echo/echo.c | 1 - drivers/staging/me4000/me4000.c | 1 - drivers/xen/balloon.c | 1 - 8 files changed, 8 deletions(-) diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c index 8387e05daae2..e39b21d3e168 100644 --- a/drivers/mtd/onenand/omap2.c +++ b/drivers/mtd/onenand/omap2.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include diff --git a/drivers/net/atl1e/atl1e.h b/drivers/net/atl1e/atl1e.h index b645fa0f3f64..c49550d507a0 100644 --- a/drivers/net/atl1e/atl1e.h +++ b/drivers/net/atl1e/atl1e.h @@ -46,7 +46,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/net/ucc_geth_ethtool.c b/drivers/net/ucc_geth_ethtool.c index cfbbfee55836..85f38a6b6a49 100644 --- a/drivers/net/ucc_geth_ethtool.c +++ b/drivers/net/ucc_geth_ethtool.c @@ -37,7 +37,6 @@ #include #include #include -#include #include "ucc_geth.h" #include "ucc_geth_mii.h" diff --git a/drivers/pnp/interface.c b/drivers/pnp/interface.c index 478a4a739c00..c3f1c8e9d254 100644 --- a/drivers/pnp/interface.c +++ b/drivers/pnp/interface.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c index 2bec9ccc0293..a9a9893a5f95 100644 --- a/drivers/sbus/char/jsflash.c +++ b/drivers/sbus/char/jsflash.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include diff --git a/drivers/staging/echo/echo.c b/drivers/staging/echo/echo.c index b8f2c5e9dee5..fd4007e329e7 100644 --- a/drivers/staging/echo/echo.c +++ b/drivers/staging/echo/echo.c @@ -106,7 +106,6 @@ #include /* We're doing kernel work */ #include -#include #include #include "bit_operations.h" diff --git a/drivers/staging/me4000/me4000.c b/drivers/staging/me4000/me4000.c index cf8b01bcac8d..0394e2709278 100644 --- a/drivers/staging/me4000/me4000.c +++ b/drivers/staging/me4000/me4000.c @@ -39,7 +39,6 @@ #include #include #include -#include /* Include-File for the Meilhaus ME-4000 I/O board */ #include "me4000.h" diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 8c83abc73400..a0fb5eac407c 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -41,7 +41,6 @@ #include #include #include -#include #include #include From 85d7a070264272ceffec0c7ce0e9af1e37c62b6e Mon Sep 17 00:00:00 2001 From: Sanjeev Premi Date: Tue, 4 Nov 2008 13:35:06 -0800 Subject: [PATCH 058/173] ARM: OMAP: Fix compiler warnings in gpmc.c Fix these compiler warnings: gpmc.c: In function 'gpmc_init': gpmc.c:432: warning: 'return' with a value, in function returning void gpmc.c:439: warning: 'return' with a value, in function returning void Signed-off-by: Sanjeev Premi Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/gpmc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-omap2/gpmc.c b/arch/arm/mach-omap2/gpmc.c index 763bdbeaf681..2249049c1d5a 100644 --- a/arch/arm/mach-omap2/gpmc.c +++ b/arch/arm/mach-omap2/gpmc.c @@ -429,18 +429,16 @@ void __init gpmc_init(void) gpmc_l3_clk = clk_get(NULL, ck); if (IS_ERR(gpmc_l3_clk)) { printk(KERN_ERR "Could not get GPMC clock %s\n", ck); - return -ENODEV; + BUG(); } gpmc_base = ioremap(l, SZ_4K); if (!gpmc_base) { clk_put(gpmc_l3_clk); printk(KERN_ERR "Could not get GPMC register memory\n"); - return -ENOMEM; + BUG(); } - BUG_ON(IS_ERR(gpmc_l3_clk)); - l = gpmc_read_reg(GPMC_REVISION); printk(KERN_INFO "GPMC revision %d.%d\n", (l >> 4) & 0x0f, l & 0x0f); /* Set smart idle mode and automatic L3 clock gating */ From e621f266d4cd18a07a833877c3995d2ccb35b951 Mon Sep 17 00:00:00 2001 From: Zhaolei Date: Tue, 4 Nov 2008 13:35:07 -0800 Subject: [PATCH 059/173] ARM: OMAP: Fix debugfs_create_*'s error checking method for arm/plat-omap debugfs_create_*() returns NULL if an error occurs, returns -ENODEV when debugfs is not enabled in the kernel. Comparing to PATCH v1, because clk_debugfs_init is included in "#if defined CONFIG_DEBUG_FS", we only need to check NULL return. Thanks Li Zefan debugfs_create_u8() and other function's return value's checking method are also fixed in this patch. Signed-off-by: Zhao Lei Signed-off-by: Tony Lindgren --- arch/arm/plat-omap/clock.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm/plat-omap/clock.c b/arch/arm/plat-omap/clock.c index bf6a10c5fc4f..be6aab9c6834 100644 --- a/arch/arm/plat-omap/clock.c +++ b/arch/arm/plat-omap/clock.c @@ -428,23 +428,23 @@ static int clk_debugfs_register_one(struct clk *c) if (c->id != 0) sprintf(p, ":%d", c->id); d = debugfs_create_dir(s, pa ? pa->dent : clk_debugfs_root); - if (IS_ERR(d)) - return PTR_ERR(d); + if (!d) + return -ENOMEM; c->dent = d; d = debugfs_create_u8("usecount", S_IRUGO, c->dent, (u8 *)&c->usecount); - if (IS_ERR(d)) { - err = PTR_ERR(d); + if (!d) { + err = -ENOMEM; goto err_out; } d = debugfs_create_u32("rate", S_IRUGO, c->dent, (u32 *)&c->rate); - if (IS_ERR(d)) { - err = PTR_ERR(d); + if (!d) { + err = -ENOMEM; goto err_out; } d = debugfs_create_x32("flags", S_IRUGO, c->dent, (u32 *)&c->flags); - if (IS_ERR(d)) { - err = PTR_ERR(d); + if (!d) { + err = -ENOMEM; goto err_out; } return 0; @@ -483,8 +483,8 @@ static int __init clk_debugfs_init(void) int err; d = debugfs_create_dir("clock", NULL); - if (IS_ERR(d)) - return PTR_ERR(d); + if (!d) + return -ENOMEM; clk_debugfs_root = d; list_for_each_entry(c, &clocks, node) { From 52414739ca3df12f6d1e78d4dc670e97af0e845f Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 4 Nov 2008 13:35:07 -0800 Subject: [PATCH 060/173] ARM: OMAP: Fix get_irqnr_and_base to clear spurious interrupt bits On omap24xx, INTCPS_SIR_IRQ_OFFSET bits [6:0] contains the current active interrupt number. However, on 34xx INTCPS_SIR_IRQ_OFFSET bits [31:7] also contains the SPURIOUSIRQFLAG, which gets set if the interrupt sorting information is invalid. If the SPURIOUSIRQFLAG bits are not ignored, the interrupt code will occasionally produce a bunch of confusing errors: irq -33, desc: c02ddcc8, depth: 0, count: 0, unhandled: 0 ->handle_irq(): c006f23c, handle_bad_irq+0x0/0x22c ->chip(): 00000000, 0x0 ->action(): 00000000 Fix this by masking out only the ACTIVEIRQ bits. Also fix a confusing comment. Signed-off-by: Tony Lindgren --- arch/arm/plat-omap/include/mach/entry-macro.S | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/plat-omap/include/mach/entry-macro.S b/arch/arm/plat-omap/include/mach/entry-macro.S index 030118ee204a..2276f89671d8 100644 --- a/arch/arm/plat-omap/include/mach/entry-macro.S +++ b/arch/arm/plat-omap/include/mach/entry-macro.S @@ -65,7 +65,8 @@ #include #endif -#define INTCPS_SIR_IRQ_OFFSET 0x0040 /* Active interrupt number */ +#define INTCPS_SIR_IRQ_OFFSET 0x0040 /* Active interrupt offset */ +#define ACTIVEIRQ_MASK 0x7f /* Active interrupt bits */ .macro disable_fiq .endm @@ -88,6 +89,7 @@ cmp \irqnr, #0x0 2222: ldrne \irqnr, [\base, #INTCPS_SIR_IRQ_OFFSET] + and \irqnr, \irqnr, #ACTIVEIRQ_MASK /* Clear spurious bits */ .endm From 5c32f62b97d62bec097c09e54e6602d0fce2af07 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 4 Nov 2008 13:35:08 -0800 Subject: [PATCH 061/173] ARM: OMAP: Fix define for twl4030 irqs Otherwise twl4030 gpios won't work. Signed-off-by: Tony Lindgren --- arch/arm/plat-omap/include/mach/irqs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/plat-omap/include/mach/irqs.h b/arch/arm/plat-omap/include/mach/irqs.h index a2929ac8c687..bed5274c910a 100644 --- a/arch/arm/plat-omap/include/mach/irqs.h +++ b/arch/arm/plat-omap/include/mach/irqs.h @@ -372,7 +372,7 @@ /* External TWL4030 gpio interrupts are optional */ #define TWL4030_GPIO_IRQ_BASE TWL4030_PWR_IRQ_END -#ifdef CONFIG_TWL4030_GPIO +#ifdef CONFIG_GPIO_TWL4030 #define TWL4030_GPIO_NR_IRQS 18 #else #define TWL4030_GPIO_NR_IRQS 0 From 79654a7698195fa043063092f5c1ca5245276fba Mon Sep 17 00:00:00 2001 From: Andreas Steffen Date: Tue, 4 Nov 2008 14:49:19 -0800 Subject: [PATCH 062/173] xfrm: Have af-specific init_tempsel() initialize family field of temporary selector While adding MIGRATE support to strongSwan, Andreas Steffen noticed that the selectors provided in XFRM_MSG_ACQUIRE have their family field uninitialized (those in MIGRATE do have their family set). Looking at the code, this is because the af-specific init_tempsel() (called via afinfo->init_tempsel() in xfrm_init_tempsel()) do not set the value. Reported-by: Andreas Steffen Acked-by: Herbert Xu Signed-off-by: Arnaud Ebalard --- net/ipv4/xfrm4_state.c | 1 + net/ipv6/xfrm6_state.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 07735ed280d7..55dc6beab9aa 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -33,6 +33,7 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl, x->sel.dport_mask = htons(0xffff); x->sel.sport = xfrm_flowi_sport(fl); x->sel.sport_mask = htons(0xffff); + x->sel.family = AF_INET; x->sel.prefixlen_d = 32; x->sel.prefixlen_s = 32; x->sel.proto = fl->proto; diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 89884a4f23aa..60c78cfc2737 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -34,6 +34,7 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl, x->sel.dport_mask = htons(0xffff); x->sel.sport = xfrm_flowi_sport(fl); x->sel.sport_mask = htons(0xffff); + x->sel.family = AF_INET6; x->sel.prefixlen_d = 128; x->sel.prefixlen_s = 128; x->sel.proto = fl->proto; From 9b22ea560957de1484e6b3e8538f7eef202e3596 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 4 Nov 2008 14:49:57 -0800 Subject: [PATCH 063/173] net: fix packet socket delivery in rx irq handler The changes to deliver hardware accelerated VLAN packets to packet sockets (commit bc1d0411) caused a warning for non-NAPI drivers. The __vlan_hwaccel_rx() function is called directly from the drivers RX function, for non-NAPI drivers that means its still in RX IRQ context: [ 27.779463] ------------[ cut here ]------------ [ 27.779509] WARNING: at kernel/softirq.c:136 local_bh_enable+0x37/0x81() ... [ 27.782520] [] netif_nit_deliver+0x5b/0x75 [ 27.782590] [] __vlan_hwaccel_rx+0x79/0x162 [ 27.782664] [] atl1_intr+0x9a9/0xa7c [atl1] [ 27.782738] [] handle_IRQ_event+0x23/0x51 [ 27.782808] [] handle_edge_irq+0xc2/0x102 [ 27.782878] [] do_IRQ+0x4d/0x64 Split hardware accelerated VLAN reception into two parts to fix this: - __vlan_hwaccel_rx just stores the VLAN TCI and performs the VLAN device lookup, then calls netif_receive_skb()/netif_rx() - vlan_hwaccel_do_receive(), which is invoked by netif_receive_skb() in softirq context, performs the real reception and delivery to packet sockets. Reported-and-tested-by: Ramon Casellas Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 7 +++++++ net/8021q/vlan_core.c | 46 +++++++++++++++++++++++++++++------------ net/core/dev.c | 3 +++ 3 files changed, 43 insertions(+), 13 deletions(-) diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 9e7b49b8062d..a5cb0c3f6dcf 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -114,6 +114,8 @@ extern u16 vlan_dev_vlan_id(const struct net_device *dev); extern int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, u16 vlan_tci, int polling); +extern int vlan_hwaccel_do_receive(struct sk_buff *skb); + #else static inline struct net_device *vlan_dev_real_dev(const struct net_device *dev) { @@ -133,6 +135,11 @@ static inline int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, BUG(); return NET_XMIT_SUCCESS; } + +static inline int vlan_hwaccel_do_receive(struct sk_buff *skb) +{ + return 0; +} #endif /** diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 916061f681b6..68ced4bf158c 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -3,11 +3,20 @@ #include #include "vlan.h" +struct vlan_hwaccel_cb { + struct net_device *dev; +}; + +static inline struct vlan_hwaccel_cb *vlan_hwaccel_cb(struct sk_buff *skb) +{ + return (struct vlan_hwaccel_cb *)skb->cb; +} + /* VLAN rx hw acceleration helper. This acts like netif_{rx,receive_skb}(). */ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, u16 vlan_tci, int polling) { - struct net_device_stats *stats; + struct vlan_hwaccel_cb *cb = vlan_hwaccel_cb(skb); if (skb_bond_should_drop(skb)) { dev_kfree_skb_any(skb); @@ -15,23 +24,35 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, } skb->vlan_tci = vlan_tci; + cb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK); + + return (polling ? netif_receive_skb(skb) : netif_rx(skb)); +} +EXPORT_SYMBOL(__vlan_hwaccel_rx); + +int vlan_hwaccel_do_receive(struct sk_buff *skb) +{ + struct vlan_hwaccel_cb *cb = vlan_hwaccel_cb(skb); + struct net_device *dev = cb->dev; + struct net_device_stats *stats; + netif_nit_deliver(skb); - skb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK); - if (skb->dev == NULL) { - dev_kfree_skb_any(skb); - /* Not NET_RX_DROP, this is not being dropped - * due to congestion. */ - return NET_RX_SUCCESS; + if (dev == NULL) { + kfree_skb(skb); + return -1; } - skb->dev->last_rx = jiffies; + + skb->dev = dev; + skb->priority = vlan_get_ingress_priority(dev, skb->vlan_tci); skb->vlan_tci = 0; - stats = &skb->dev->stats; + dev->last_rx = jiffies; + + stats = &dev->stats; stats->rx_packets++; stats->rx_bytes += skb->len; - skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci); switch (skb->pkt_type) { case PACKET_BROADCAST: break; @@ -43,13 +64,12 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, * This allows the VLAN to have a different MAC than the * underlying device, and still route correctly. */ if (!compare_ether_addr(eth_hdr(skb)->h_dest, - skb->dev->dev_addr)) + dev->dev_addr)) skb->pkt_type = PACKET_HOST; break; }; - return (polling ? netif_receive_skb(skb) : netif_rx(skb)); + return 0; } -EXPORT_SYMBOL(__vlan_hwaccel_rx); struct net_device *vlan_dev_real_dev(const struct net_device *dev) { diff --git a/net/core/dev.c b/net/core/dev.c index d9038e328cc1..9174c77d3112 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2218,6 +2218,9 @@ int netif_receive_skb(struct sk_buff *skb) int ret = NET_RX_DROP; __be16 type; + if (skb->vlan_tci && vlan_hwaccel_do_receive(skb)) + return NET_RX_SUCCESS; + /* if we've gotten here through NAPI, check netpoll */ if (netpoll_receive_skb(skb)) return NET_RX_DROP; From fce4d58353e449a1ac637fc8d2b994e0fcc55312 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 2 Nov 2008 07:26:51 +0000 Subject: [PATCH 064/173] powerpc/ps3: Fix compile error in ps3-lpm.c Compiling with CONFIG_SMP = n and CONFIG_PS3_LPM != n gives this error: drivers/ps3/ps3-lpm.c:838: error: implicit declaration of function 'get_hard_smp_processor_id' This fixes it. We have to include rather than because the UP definition of get_hard_smp_processor_id() is in , and only includes if CONFIG_SMP = y. Signed-off-by: Alexey Dobriyan Acked-by: Geoff Levand Signed-off-by: Paul Mackerras --- drivers/ps3/ps3-lpm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ps3/ps3-lpm.c b/drivers/ps3/ps3-lpm.c index 85edf945ab86..204158cf7a55 100644 --- a/drivers/ps3/ps3-lpm.c +++ b/drivers/ps3/ps3-lpm.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include From 9c8b4aff18b59cd0c2d9a77b3df1f9d7077df90c Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 2 Nov 2008 10:21:57 +0000 Subject: [PATCH 065/173] powerpc/cell: Fix compile error in ras.c This fixes this error on Cell when CONFIG_KEXEC = n: arch/powerpc/platforms/cell/ras.c:299: error: implicit declaration of function 'crash_shutdown_register' We have to include because it contains the dummy definition of crash_shutdown_register that is used when CONFIG_KEXEC=n, but doesn't include in that case. Signed-off-by: Alexey Dobriyan Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/cell/ras.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/cell/ras.c b/arch/powerpc/platforms/cell/ras.c index fdf088f2430e..7b4cefa2199b 100644 --- a/arch/powerpc/platforms/cell/ras.c +++ b/arch/powerpc/platforms/cell/ras.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include From 454666eb78d890f5740ea1901f8b01a43c77c67c Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Sun, 2 Nov 2008 21:18:24 +0000 Subject: [PATCH 066/173] powerpc: Fix "unused variable" warning in pci_dlpar.c This gets rid of this build warning: arch/powerpc/platforms/pseries/pci_dlpar.c: In function 'init_phb_dynamic': arch/powerpc/platforms/pseries/pci_dlpar.c:192: warning: unused variable 'b' This is one of the very few warnings left in a ppc64_defconfig build and getting rid of it will make it easier to see future introduced ones (in fact this was introduced very recently). Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/pseries/pci_dlpar.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 31481dc485de..7190493e9bdc 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -189,7 +189,6 @@ struct pci_controller * __devinit init_phb_dynamic(struct device_node *dn) { struct pci_controller *phb; int primary; - struct pci_bus *b; primary = list_empty(&hose_list); phb = pcibios_alloc_controller(dn); From f4b6755fb37595da3630d1d6fc130ea6888cd48f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 4 Nov 2008 21:25:07 +0100 Subject: [PATCH 067/173] sched: cleanup fair task selection Impact: cleanup Clean up task selection Signed-off-by: Peter Zijlstra Acked-by: Mike Galbraith Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index ce514afd78ff..6167336a2372 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -347,17 +347,17 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) rb_erase(&se->run_node, &cfs_rq->tasks_timeline); } -static inline struct rb_node *first_fair(struct cfs_rq *cfs_rq) -{ - return cfs_rq->rb_leftmost; -} - static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq) { - return rb_entry(first_fair(cfs_rq), struct sched_entity, run_node); + struct rb_node *left = cfs_rq->rb_leftmost; + + if (!left) + return NULL; + + return rb_entry(left, struct sched_entity, run_node); } -static inline struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) +static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) { struct rb_node *last = rb_last(&cfs_rq->tasks_timeline); @@ -794,28 +794,16 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) static int wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se); -static struct sched_entity * -pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se) +static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) { + struct sched_entity *se = __pick_next_entity(cfs_rq); + if (!cfs_rq->next || wakeup_preempt_entity(cfs_rq->next, se) == 1) return se; return cfs_rq->next; } -static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) -{ - struct sched_entity *se = NULL; - - if (first_fair(cfs_rq)) { - se = __pick_next_entity(cfs_rq); - se = pick_next(cfs_rq, se); - set_next_entity(cfs_rq, se); - } - - return se; -} - static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev) { /* @@ -1396,6 +1384,7 @@ static struct task_struct *pick_next_task_fair(struct rq *rq) do { se = pick_next_entity(cfs_rq); + set_next_entity(cfs_rq, se); cfs_rq = group_cfs_rq(se); } while (cfs_rq); From d95f98d0691d3aba5e35850011946a08c9b36428 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 4 Nov 2008 21:25:08 +0100 Subject: [PATCH 068/173] sched: fix fair preempt check Impact: fix cross-class preemption Inter-class wakeup preemptions should go on class order. Signed-off-by: Peter Zijlstra Acked-by: Mike Galbraith Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 6167336a2372..ebd6de8d17fd 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1329,6 +1329,9 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) return; } + if (unlikely(p->sched_class != &fair_sched_class)) + return; + if (unlikely(se == pse)) return; From 4793241be408b3926ee00c704d7da3b3faf3a05f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 4 Nov 2008 21:25:09 +0100 Subject: [PATCH 069/173] sched: backward looking buddy Impact: improve/change/fix wakeup-buddy scheduling Currently we only have a forward looking buddy, that is, we prefer to schedule to the task we last woke up, under the presumption that its going to consume the data we just produced, and therefore will have cache hot benefits. This allows co-waking producer/consumer task pairs to run ahead of the pack for a little while, keeping their cache warm. Without this, we would interleave all pairs, utterly trashing the cache. This patch introduces a backward looking buddy, that is, suppose that in the above scenario, the consumer preempts the producer before it can go to sleep, we will therefore miss the wakeup from consumer to producer (its already running, after all), breaking the cycle and reverting to the cache-trashing interleaved schedule pattern. The backward buddy will try to schedule back to the task that woke us up in case the forward buddy is not available, under the assumption that the last task will be the one with the most cache hot task around barring current. This will basically allow a task to continue after it got preempted. In order to avoid starvation, we allow either buddy to get wakeup_gran ahead of the pack. Signed-off-by: Peter Zijlstra Acked-by: Mike Galbraith Signed-off-by: Ingo Molnar --- kernel/sched.c | 6 ++++-- kernel/sched_fair.c | 32 +++++++++++++++++++++++++------- kernel/sched_features.h | 1 + 3 files changed, 30 insertions(+), 9 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index e8819bc6f462..82cc839c9210 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -397,7 +397,7 @@ struct cfs_rq { * 'curr' points to currently running entity on this cfs_rq. * It is set to NULL otherwise (i.e when none are currently running). */ - struct sched_entity *curr, *next; + struct sched_entity *curr, *next, *last; unsigned long nr_spread_over; @@ -1805,7 +1805,9 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) /* * Buddy candidates are cache hot: */ - if (sched_feat(CACHE_HOT_BUDDY) && (&p->se == cfs_rq_of(&p->se)->next)) + if (sched_feat(CACHE_HOT_BUDDY) && + (&p->se == cfs_rq_of(&p->se)->next || + &p->se == cfs_rq_of(&p->se)->last)) return 1; if (p->sched_class != &fair_sched_class) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index ebd6de8d17fd..a6b1db8a0bd8 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -341,9 +341,6 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) cfs_rq->rb_leftmost = next_node; } - if (cfs_rq->next == se) - cfs_rq->next = NULL; - rb_erase(&se->run_node, &cfs_rq->tasks_timeline); } @@ -741,6 +738,12 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) #endif } + if (cfs_rq->last == se) + cfs_rq->last = NULL; + + if (cfs_rq->next == se) + cfs_rq->next = NULL; + if (se != cfs_rq->curr) __dequeue_entity(cfs_rq, se); account_entity_dequeue(cfs_rq, se); @@ -798,10 +801,13 @@ static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) { struct sched_entity *se = __pick_next_entity(cfs_rq); - if (!cfs_rq->next || wakeup_preempt_entity(cfs_rq->next, se) == 1) - return se; + if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, se) < 1) + return cfs_rq->next; - return cfs_rq->next; + if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, se) < 1) + return cfs_rq->last; + + return se; } static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev) @@ -1319,10 +1325,11 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se) static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) { struct task_struct *curr = rq->curr; - struct cfs_rq *cfs_rq = task_cfs_rq(curr); struct sched_entity *se = &curr->se, *pse = &p->se; if (unlikely(rt_prio(p->prio))) { + struct cfs_rq *cfs_rq = task_cfs_rq(curr); + update_rq_clock(rq); update_curr(cfs_rq); resched_task(curr); @@ -1335,6 +1342,17 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) if (unlikely(se == pse)) return; + /* + * Only set the backward buddy when the current task is still on the + * rq. This can happen when a wakeup gets interleaved with schedule on + * the ->pre_schedule() or idle_balance() point, either of which can + * drop the rq lock. + * + * Also, during early boot the idle thread is in the fair class, for + * obvious reasons its a bad idea to schedule back to the idle thread. + */ + if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle)) + cfs_rq_of(se)->last = se; cfs_rq_of(pse)->next = pse; /* diff --git a/kernel/sched_features.h b/kernel/sched_features.h index fda016218296..da5d93b5d2c6 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h @@ -12,3 +12,4 @@ SCHED_FEAT(LB_BIAS, 1) SCHED_FEAT(LB_WAKEUP_UPDATE, 1) SCHED_FEAT(ASYM_EFF_LOAD, 1) SCHED_FEAT(WAKEUP_OVERLAP, 0) +SCHED_FEAT(LAST_BUDDY, 1) From 02479099c286894644f8e96c6bbb535ab64662fd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 4 Nov 2008 21:25:10 +0100 Subject: [PATCH 070/173] sched: fix buddies for group scheduling Impact: scheduling order fix for group scheduling For each level in the hierarchy, set the buddy to point to the right entity. Therefore, when we do the hierarchical schedule, we have a fair chance of ending up where we meant to. Signed-off-by: Peter Zijlstra Acked-by: Mike Galbraith Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index a6b1db8a0bd8..51aa3e102acb 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1319,6 +1319,18 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se) return 0; } +static void set_last_buddy(struct sched_entity *se) +{ + for_each_sched_entity(se) + cfs_rq_of(se)->last = se; +} + +static void set_next_buddy(struct sched_entity *se) +{ + for_each_sched_entity(se) + cfs_rq_of(se)->next = se; +} + /* * Preempt the current task with a newly woken task if needed: */ @@ -1352,8 +1364,8 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) * obvious reasons its a bad idea to schedule back to the idle thread. */ if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle)) - cfs_rq_of(se)->last = se; - cfs_rq_of(pse)->next = pse; + set_last_buddy(se); + set_next_buddy(pse); /* * We can come here with TIF_NEED_RESCHED already set from new task From b22cecdd8fa4667ebab02def0866387e709927ee Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 5 Nov 2008 01:35:55 -0800 Subject: [PATCH 071/173] net/9p: fix printk format warnings Fix printk format warnings in net/9p. Built cleanly on 7 arches. net/9p/client.c:820: warning: format '%llx' expects type 'long long unsigned int', but argument 4 has type 'u64' net/9p/client.c:820: warning: format '%llx' expects type 'long long unsigned int', but argument 5 has type 'u64' net/9p/client.c:867: warning: format '%llx' expects type 'long long unsigned int', but argument 4 has type 'u64' net/9p/client.c:867: warning: format '%llx' expects type 'long long unsigned int', but argument 5 has type 'u64' net/9p/client.c:932: warning: format '%llx' expects type 'long long unsigned int', but argument 5 has type 'u64' net/9p/client.c:932: warning: format '%llx' expects type 'long long unsigned int', but argument 6 has type 'u64' net/9p/client.c:982: warning: format '%llx' expects type 'long long unsigned int', but argument 4 has type 'u64' net/9p/client.c:982: warning: format '%llx' expects type 'long long unsigned int', but argument 5 has type 'u64' net/9p/client.c:1025: warning: format '%llx' expects type 'long long unsigned int', but argument 4 has type 'u64' net/9p/client.c:1025: warning: format '%llx' expects type 'long long unsigned int', but argument 5 has type 'u64' net/9p/client.c:1227: warning: format '%llx' expects type 'long long unsigned int', but argument 7 has type 'u64' net/9p/client.c:1227: warning: format '%llx' expects type 'long long unsigned int', but argument 12 has type 'u64' net/9p/client.c:1227: warning: format '%llx' expects type 'long long unsigned int', but argument 8 has type 'u64' net/9p/client.c:1227: warning: format '%llx' expects type 'long long unsigned int', but argument 13 has type 'u64' net/9p/client.c:1252: warning: format '%llx' expects type 'long long unsigned int', but argument 7 has type 'u64' net/9p/client.c:1252: warning: format '%llx' expects type 'long long unsigned int', but argument 12 has type 'u64' net/9p/client.c:1252: warning: format '%llx' expects type 'long long unsigned int', but argument 8 has type 'u64' net/9p/client.c:1252: warning: format '%llx' expects type 'long long unsigned int', but argument 13 has type 'u64' Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- net/9p/client.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/net/9p/client.c b/net/9p/client.c index 67717f69412e..0a04faa22116 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -818,7 +818,9 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, } P9_DPRINTK(P9_DEBUG_9P, "<<< RATTACH qid %x.%llx.%x\n", - qid.type, qid.path, qid.version); + qid.type, + (unsigned long long)qid.path, + qid.version); memmove(&fid->qid, &qid, sizeof(struct p9_qid)); @@ -865,7 +867,9 @@ p9_client_auth(struct p9_client *clnt, char *uname, u32 n_uname, char *aname) } P9_DPRINTK(P9_DEBUG_9P, "<<< RAUTH qid %x.%llx.%x\n", - qid.type, qid.path, qid.version); + qid.type, + (unsigned long long)qid.path, + qid.version); memmove(&afid->qid, &qid, sizeof(struct p9_qid)); p9_free_req(clnt, req); @@ -930,7 +934,8 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, for (count = 0; count < nwqids; count++) P9_DPRINTK(P9_DEBUG_9P, "<<< [%d] %x.%llx.%x\n", - count, wqids[count].type, wqids[count].path, + count, wqids[count].type, + (unsigned long long)wqids[count].path, wqids[count].version); if (nwname) @@ -980,7 +985,9 @@ int p9_client_open(struct p9_fid *fid, int mode) } P9_DPRINTK(P9_DEBUG_9P, "<<< ROPEN qid %x.%llx.%x iounit %x\n", - qid.type, qid.path, qid.version, iounit); + qid.type, + (unsigned long long)qid.path, + qid.version, iounit); fid->mode = mode; fid->iounit = iounit; @@ -1023,7 +1030,9 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, } P9_DPRINTK(P9_DEBUG_9P, "<<< RCREATE qid %x.%llx.%x iounit %x\n", - qid.type, qid.path, qid.version, iounit); + qid.type, + (unsigned long long)qid.path, + qid.version, iounit); fid->mode = mode; fid->iounit = iounit; @@ -1230,9 +1239,9 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) "<<< name=%s uid=%s gid=%s muid=%s extension=(%s)\n" "<<< uid=%d gid=%d n_muid=%d\n", ret->size, ret->type, ret->dev, ret->qid.type, - ret->qid.path, ret->qid.version, ret->mode, - ret->atime, ret->mtime, ret->length, ret->name, - ret->uid, ret->gid, ret->muid, ret->extension, + (unsigned long long)ret->qid.path, ret->qid.version, ret->mode, + ret->atime, ret->mtime, (unsigned long long)ret->length, + ret->name, ret->uid, ret->gid, ret->muid, ret->extension, ret->n_uid, ret->n_gid, ret->n_muid); free_and_error: @@ -1255,9 +1264,9 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) " name=%s uid=%s gid=%s muid=%s extension=(%s)\n" " uid=%d gid=%d n_muid=%d\n", wst->size, wst->type, wst->dev, wst->qid.type, - wst->qid.path, wst->qid.version, wst->mode, - wst->atime, wst->mtime, wst->length, wst->name, - wst->uid, wst->gid, wst->muid, wst->extension, + (unsigned long long)wst->qid.path, wst->qid.version, wst->mode, + wst->atime, wst->mtime, (unsigned long long)wst->length, + wst->name, wst->uid, wst->gid, wst->muid, wst->extension, wst->n_uid, wst->n_gid, wst->n_muid); err = 0; clnt = fid->clnt; From e3ec6cfc260e2322834e200c2fa349cdf104fd13 Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Wed, 5 Nov 2008 01:43:57 -0800 Subject: [PATCH 072/173] ipv6: fix run pending DAD when interface becomes ready With some net devices types, an IPv6 address configured while the interface was down can stay 'tentative' forever, even after the interface is set up. In some case, pending IPv6 DADs are not executed when the device becomes ready. I observed this while doing some tests with kvm. If I assign an IPv6 address to my interface eth0 (kvm driver rtl8139) when it is still down then the address is flagged tentative (IFA_F_TENTATIVE). Then, I set eth0 up, and to my surprise, the address stays 'tentative', no DAD is executed and the address can't be pinged. I also observed the same behaviour, without kvm, with virtual interfaces types macvlan and veth. Some easy steps to reproduce the issue with macvlan: 1. ip link add link eth0 type macvlan 2. ip -6 addr add 2003::ab32/64 dev macvlan0 3. ip addr show dev macvlan0 ... inet6 2003::ab32/64 scope global tentative ... 4. ip link set macvlan0 up 5. ip addr show dev macvlan0 ... inet6 2003::ab32/64 scope global tentative ... Address is still tentative I think there's a bug in net/ipv6/addrconf.c, addrconf_notify(): addrconf_dad_run() is not always run when the interface is flagged IF_READY. Currently it is only run when receiving NETDEV_CHANGE event. Looks like some (virtual) devices doesn't send this event when becoming up. For both NETDEV_UP and NETDEV_CHANGE events, when the interface becomes ready, run_pending should be set to 1. Patch below. 'run_pending = 1' could be moved below the if/else block but it makes the code less readable. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index eea9542728ca..d9da5eb9dcb2 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2483,8 +2483,10 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, if (!idev && dev->mtu >= IPV6_MIN_MTU) idev = ipv6_add_dev(dev); - if (idev) + if (idev) { idev->if_flags |= IF_READY; + run_pending = 1; + } } else { if (!addrconf_qdisc_ok(dev)) { /* device is still not ready. */ From efb9a8c28ca0edd9e2572117105ebad9bbc0c368 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 5 Nov 2008 03:03:18 -0800 Subject: [PATCH 073/173] netfilter: netns ct: walk netns list under RTNL netns list (just list) is under RTNL. But helper and proto unregistration happen during rmmod when RTNL is not held, and that's how it was tested: modprobe/rmmod vs clone(CLONE_NEWNET)/exit. BUG: unable to handle kernel paging request at 0000000000100100 <=== IP: [] nf_conntrack_l4proto_unregister+0x96/0xae [nf_conntrack] PGD 15e300067 PUD 15e1d8067 PMD 0 Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC last sysfs file: /sys/kernel/uevent_seqnum CPU 0 Modules linked in: nf_conntrack_proto_sctp(-) nf_conntrack_proto_dccp(-) af_packet iptable_nat nf_nat nf_conntrack_ipv4 nf_conntrack nf_defrag_ipv4 iptable_filter ip_tables xt_tcpudp ip6table_filter ip6_tables x_tables ipv6 sr_mod cdrom [last unloaded: nf_conntrack_proto_sctp] Pid: 16758, comm: rmmod Not tainted 2.6.28-rc2-netns-xfrm #3 RIP: 0010:[] [] nf_conntrack_l4proto_unregister+0x96/0xae [nf_conntrack] RSP: 0018:ffff88015dc1fec8 EFLAGS: 00010212 RAX: 0000000000000000 RBX: 00000000001000f8 RCX: 0000000000000000 RDX: ffffffffa009575c RSI: 0000000000000003 RDI: ffffffffa00956b5 RBP: ffff88015dc1fed8 R08: 0000000000000002 R09: 0000000000000000 R10: 0000000000000000 R11: ffff88015dc1fe48 R12: ffffffffa0458f60 R13: 0000000000000880 R14: 00007fff4c361d30 R15: 0000000000000880 FS: 00007f624435a6f0(0000) GS:ffffffff80521580(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000100100 CR3: 0000000168969000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process rmmod (pid: 16758, threadinfo ffff88015dc1e000, task ffff880179864218) Stack: ffffffffa0459100 0000000000000000 ffff88015dc1fee8 ffffffffa0457934 ffff88015dc1ff78 ffffffff80253fef 746e6e6f635f666e 6f72705f6b636172 00707463735f6f74 ffffffff8024cb30 00000000023b8010 0000000000000000 Call Trace: [] nf_conntrack_proto_sctp_fini+0x10/0x1e [nf_conntrack_proto_sctp] [] sys_delete_module+0x19f/0x1fe [] ? trace_hardirqs_on_caller+0xf0/0x114 [] ? trace_hardirqs_on_thunk+0x3a/0x3f [] system_call_fastpath+0x16/0x1b Code: 13 35 e0 e8 c4 6c 1a e0 48 8b 1d 6d c6 46 e0 eb 16 48 89 df 4c 89 e2 48 c7 c6 fc 85 09 a0 e8 61 cd ff ff 48 8b 5b 08 48 83 eb 08 <48> 8b 43 08 0f 18 08 48 8d 43 08 48 3d 60 4f 50 80 75 d3 5b 41 RIP [] nf_conntrack_l4proto_unregister+0x96/0xae [nf_conntrack] RSP CR2: 0000000000100100 ---[ end trace bde8ac82debf7192 ]--- Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_helper.c | 3 +++ net/netfilter/nf_conntrack_proto.c | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 9c06b9f86ad4..c39b6a994133 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -167,10 +168,12 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) */ synchronize_rcu(); + rtnl_lock(); spin_lock_bh(&nf_conntrack_lock); for_each_net(net) __nf_conntrack_helper_unregister(me, net); spin_unlock_bh(&nf_conntrack_lock); + rtnl_unlock(); } EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index a59a307e685d..592d73344d46 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -221,8 +222,10 @@ void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto) synchronize_rcu(); /* Remove all contrack entries for this protocol */ + rtnl_lock(); for_each_net(net) nf_ct_iterate_cleanup(net, kill_l3proto, proto); + rtnl_unlock(); } EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister); @@ -333,8 +336,10 @@ void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto) synchronize_rcu(); /* Remove all contrack entries for this protocol */ + rtnl_lock(); for_each_net(net) nf_ct_iterate_cleanup(net, kill_l4proto, l4proto); + rtnl_unlock(); } EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister); From 518a09ef11f8454f4676125d47c3e775b300c6a5 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 5 Nov 2008 03:36:01 -0800 Subject: [PATCH 074/173] tcp: Fix recvmsg MSG_PEEK influence of blocking behavior. Vito Caputo noticed that tcp_recvmsg() returns immediately from partial reads when MSG_PEEK is used. In particular, this means that SO_RCVLOWAT is not respected. Simply remove the test. And this matches the behavior of several other systems, including BSD. Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index eccb7165a80c..c5aca0bb116a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1374,8 +1374,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN) || !timeo || - signal_pending(current) || - (flags & MSG_PEEK)) + signal_pending(current)) break; } else { if (sock_flag(sk, SOCK_DONE)) From 467622ef2acb01986eab37ef96c3632b3ea35999 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 1 Nov 2008 04:19:11 -0700 Subject: [PATCH 075/173] [MTD] [NOR] Fix cfi_send_gen_cmd handling of x16 devices in x8 mode (v4) For "unlock" cycles to 16bit devices in 8bit compatibility mode we need to use the byte addresses 0xaaa and 0x555. These effectively match the word address 0x555 and 0x2aa, except the latter has its low bit set. Most chips don't care about the value of the 'A-1' pin in x8 mode, but some -- like the ST M29W320D -- do. So we need to be careful to set it where appropriate. cfi_send_gen_cmd is only ever passed addresses where the low byte is 0x00, 0x55 or 0xaa. Of those, only addresses ending 0xaa are affected by this patch, by masking in the extra low bit when the device is known to be in compatibility mode. [dwmw2: Do it only when (cmd_ofs & 0xff) == 0xaa] v4: Fix stupid typo in cfi_build_cmd_addr that failed to compile I'm writing this patch way to late at night. v3: Bring all of the work back into cfi_build_cmd_addr including calling of map_bankwidth(map) and cfi_interleave(cfi) So every caller doesn't need to. v2: Only modified the address if we our device_type is larger than our bus width. Cc: stable@kernel.org Signed-off-by: Eric W. Biederman Signed-off-by: David Woodhouse --- drivers/mtd/chips/cfi_cmdset_0002.c | 13 ------------- drivers/mtd/chips/jedec_probe.c | 10 ++++------ include/linux/mtd/cfi.h | 22 +++++++++++++++++++--- 3 files changed, 23 insertions(+), 22 deletions(-) diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c index 3e6f5d8609e8..d74ec46aa032 100644 --- a/drivers/mtd/chips/cfi_cmdset_0002.c +++ b/drivers/mtd/chips/cfi_cmdset_0002.c @@ -406,19 +406,6 @@ struct mtd_info *cfi_cmdset_0002(struct map_info *map, int primary) /* Set the default CFI lock/unlock addresses */ cfi->addr_unlock1 = 0x555; cfi->addr_unlock2 = 0x2aa; - /* Modify the unlock address if we are in compatibility mode */ - if ( /* x16 in x8 mode */ - ((cfi->device_type == CFI_DEVICETYPE_X8) && - (cfi->cfiq->InterfaceDesc == - CFI_INTERFACE_X8_BY_X16_ASYNC)) || - /* x32 in x16 mode */ - ((cfi->device_type == CFI_DEVICETYPE_X16) && - (cfi->cfiq->InterfaceDesc == - CFI_INTERFACE_X16_BY_X32_ASYNC))) - { - cfi->addr_unlock1 = 0xaaa; - cfi->addr_unlock2 = 0x555; - } } /* CFI mode */ else if (cfi->cfi_mode == CFI_MODE_JEDEC) { diff --git a/drivers/mtd/chips/jedec_probe.c b/drivers/mtd/chips/jedec_probe.c index f84ab6182148..2f3f2f719ba4 100644 --- a/drivers/mtd/chips/jedec_probe.c +++ b/drivers/mtd/chips/jedec_probe.c @@ -1808,9 +1808,7 @@ static inline u32 jedec_read_mfr(struct map_info *map, uint32_t base, * several first banks can contain 0x7f instead of actual ID */ do { - uint32_t ofs = cfi_build_cmd_addr(0 + (bank << 8), - cfi_interleave(cfi), - cfi->device_type); + uint32_t ofs = cfi_build_cmd_addr(0 + (bank << 8), map, cfi); mask = (1 << (cfi->device_type * 8)) - 1; result = map_read(map, base + ofs); bank++; @@ -1824,7 +1822,7 @@ static inline u32 jedec_read_id(struct map_info *map, uint32_t base, { map_word result; unsigned long mask; - u32 ofs = cfi_build_cmd_addr(1, cfi_interleave(cfi), cfi->device_type); + u32 ofs = cfi_build_cmd_addr(1, map, cfi); mask = (1 << (cfi->device_type * 8)) -1; result = map_read(map, base + ofs); return result.x[0] & mask; @@ -2067,8 +2065,8 @@ static int jedec_probe_chip(struct map_info *map, __u32 base, } /* Ensure the unlock addresses we try stay inside the map */ - probe_offset1 = cfi_build_cmd_addr(cfi->addr_unlock1, cfi_interleave(cfi), cfi->device_type); - probe_offset2 = cfi_build_cmd_addr(cfi->addr_unlock2, cfi_interleave(cfi), cfi->device_type); + probe_offset1 = cfi_build_cmd_addr(cfi->addr_unlock1, map, cfi); + probe_offset2 = cfi_build_cmd_addr(cfi->addr_unlock2, map, cfi); if ( ((base + probe_offset1 + map_bankwidth(map)) >= map->size) || ((base + probe_offset2 + map_bankwidth(map)) >= map->size)) goto retry; diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h index ee5124ec319e..00e2b575021f 100644 --- a/include/linux/mtd/cfi.h +++ b/include/linux/mtd/cfi.h @@ -282,9 +282,25 @@ struct cfi_private { /* * Returns the command address according to the given geometry. */ -static inline uint32_t cfi_build_cmd_addr(uint32_t cmd_ofs, int interleave, int type) +static inline uint32_t cfi_build_cmd_addr(uint32_t cmd_ofs, + struct map_info *map, struct cfi_private *cfi) { - return (cmd_ofs * type) * interleave; + unsigned bankwidth = map_bankwidth(map); + unsigned interleave = cfi_interleave(cfi); + unsigned type = cfi->device_type; + uint32_t addr; + + addr = (cmd_ofs * type) * interleave; + + /* Modify the unlock address if we are in compatiblity mode. + * For 16bit devices on 8 bit busses + * and 32bit devices on 16 bit busses + * set the low bit of the alternating bit sequence of the address. + */ + if (((type * interleave) > bankwidth) && ((uint8_t)cmd_ofs == 0xaa)) + addr |= (type >> 1)*interleave; + + return addr; } /* @@ -430,7 +446,7 @@ static inline uint32_t cfi_send_gen_cmd(u_char cmd, uint32_t cmd_addr, uint32_t int type, map_word *prev_val) { map_word val; - uint32_t addr = base + cfi_build_cmd_addr(cmd_addr, cfi_interleave(cfi), type); + uint32_t addr = base + cfi_build_cmd_addr(cmd_addr, map, cfi); val = cfi_build_cmd(cmd, map, cfi); if (prev_val) From 9fcd18c9e63e325dbd2b4c726623f760788d5aa8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 5 Nov 2008 16:52:08 +0100 Subject: [PATCH 076/173] sched: re-tune balancing Impact: improve wakeup affinity on NUMA systems, tweak SMP systems Given the fixes+tweaks to the wakeup-buddy code, re-tweak the domain balancing defaults on NUMA and SMP systems. Turn on SD_WAKE_AFFINE which was off on x86 NUMA - there's no reason why we would not want to have wakeup affinity across nodes as well. (we already do this in the standard NUMA template.) lat_ctx on a NUMA box is particularly happy about this change: before: | phoenix:~/l> ./lat_ctx -s 0 2 | "size=0k ovr=2.60 | 2 5.70 after: | phoenix:~/l> ./lat_ctx -s 0 2 | "size=0k ovr=2.65 | 2 2.07 a 2.75x speedup. pipe-test is similarly happy about it too: | phoenix:~/sched-tests> ./pipe-test | 18.26 usecs/loop. | 14.70 usecs/loop. | 14.38 usecs/loop. | 10.55 usecs/loop. # +WAKE_AFFINE on domain0+domain1 | 8.63 usecs/loop. | 8.59 usecs/loop. | 9.03 usecs/loop. | 8.94 usecs/loop. | 8.96 usecs/loop. | 8.63 usecs/loop. Also: - disable SD_BALANCE_NEWIDLE on NUMA and SMP domains (keep it for siblings) - enable SD_WAKE_BALANCE on SMP domains Sysbench+postgresql improves all around the board, quite significantly: .28-rc3-11474e2c .28-rc3-11474e2c-tune ------------------------------------------------- 1: 571 688 +17.08% 2: 1236 1206 -2.55% 4: 2381 2642 +9.89% 8: 4958 5164 +3.99% 16: 9580 9574 -0.07% 32: 7128 8118 +12.20% 64: 7342 8266 +11.18% 128: 7342 8064 +8.95% 256: 7519 7884 +4.62% 512: 7350 7731 +4.93% ------------------------------------------------- SUM: 55412 59341 +6.62% So it's a win both for the runup portion, the peak area and the tail. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/topology.h | 7 ++++--- include/linux/topology.h | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 90ac7718469a..4850e4b02b61 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -154,7 +154,7 @@ extern unsigned long node_remap_size[]; #endif -/* sched_domains SD_NODE_INIT for NUMAQ machines */ +/* sched_domains SD_NODE_INIT for NUMA machines */ #define SD_NODE_INIT (struct sched_domain) { \ .min_interval = 8, \ .max_interval = 32, \ @@ -169,8 +169,9 @@ extern unsigned long node_remap_size[]; .flags = SD_LOAD_BALANCE \ | SD_BALANCE_EXEC \ | SD_BALANCE_FORK \ - | SD_SERIALIZE \ - | SD_WAKE_BALANCE, \ + | SD_WAKE_AFFINE \ + | SD_WAKE_BALANCE \ + | SD_SERIALIZE, \ .last_balance = jiffies, \ .balance_interval = 1, \ } diff --git a/include/linux/topology.h b/include/linux/topology.h index 2158fc0d5a56..34a7ee0ebed2 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -146,10 +146,10 @@ void arch_update_cpu_topology(void); .wake_idx = 1, \ .forkexec_idx = 1, \ .flags = SD_LOAD_BALANCE \ - | SD_BALANCE_NEWIDLE \ - | SD_BALANCE_FORK \ | SD_BALANCE_EXEC \ + | SD_BALANCE_FORK \ | SD_WAKE_AFFINE \ + | SD_WAKE_BALANCE \ | BALANCE_FOR_PKG_POWER,\ .last_balance = jiffies, \ .balance_interval = 1, \ From 517ac45af4b55913587279d89001171c222f22e7 Mon Sep 17 00:00:00 2001 From: Tom Tucker Date: Thu, 23 Oct 2008 16:30:13 -0500 Subject: [PATCH 077/173] 9p: rdma: Set trans prior to requesting async connection ops The RDMA connection manager is fundamentally asynchronous. Since the async callback context is the client pointer, the transport in the client struct needs to be set prior to calling the first async op. Signed-off-by: Tom Tucker Signed-off-by: Eric Van Hensbergen --- net/9p/trans_rdma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 8d6cc4777aae..4e9d2e673cf4 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -589,6 +589,9 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args) if (IS_ERR(rdma->cm_id)) goto error; + /* Associate the client with the transport */ + client->trans = rdma; + /* Resolve the server's address */ rdma->addr.sin_family = AF_INET; rdma->addr.sin_addr.s_addr = in_aton(addr); @@ -669,7 +672,6 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args) if (err || (rdma->state != P9_RDMA_CONNECTED)) goto error; - client->trans = rdma; client->status = Connected; return 0; From cac23d6505546f4cfa42d949ec46d431a44bd39c Mon Sep 17 00:00:00 2001 From: Tom Tucker Date: Thu, 23 Oct 2008 16:31:02 -0500 Subject: [PATCH 078/173] 9p: Make all client spin locks IRQ safe The client lock must be IRQ safe. Some of the lock acquisition paths took regular spin locks. Signed-off-by: Tom Tucker Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/9p/client.c b/net/9p/client.c index 67717f69412e..f4e6c05b3c68 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -613,6 +613,7 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt) { int err; struct p9_fid *fid; + unsigned long flags; P9_DPRINTK(P9_DEBUG_FID, "clnt %p\n", clnt); fid = kmalloc(sizeof(struct p9_fid), GFP_KERNEL); @@ -632,9 +633,9 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt) fid->clnt = clnt; fid->aux = NULL; - spin_lock(&clnt->lock); + spin_lock_irqsave(&clnt->lock, flags); list_add(&fid->flist, &clnt->fidlist); - spin_unlock(&clnt->lock); + spin_unlock_irqrestore(&clnt->lock, flags); return fid; @@ -646,13 +647,14 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt) static void p9_fid_destroy(struct p9_fid *fid) { struct p9_client *clnt; + unsigned long flags; P9_DPRINTK(P9_DEBUG_FID, "fid %d\n", fid->fid); clnt = fid->clnt; p9_idpool_put(fid->fid, clnt->fidpool); - spin_lock(&clnt->lock); + spin_lock_irqsave(&clnt->lock, flags); list_del(&fid->flist); - spin_unlock(&clnt->lock); + spin_unlock_irqrestore(&clnt->lock, flags); kfree(fid); } From 82b189eaaf6186b7694317632255fa87460820a0 Mon Sep 17 00:00:00 2001 From: Tom Tucker Date: Thu, 23 Oct 2008 16:32:28 -0500 Subject: [PATCH 079/173] 9p: Remove unneeded free of fcall for Flush T and R fcall are reused until the client is destroyed. There does not need to be a special case for Flush Signed-off-by: Tom Tucker Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/net/9p/client.c b/net/9p/client.c index f4e6c05b3c68..26ca8ab45196 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -311,12 +311,6 @@ static void p9_free_req(struct p9_client *c, struct p9_req_t *r) r->status = REQ_STATUS_IDLE; if (tag != P9_NOTAG && p9_idpool_check(tag, c->tagpool)) p9_idpool_put(tag, c->tagpool); - - /* if this was a flush request we have to free response fcall */ - if (r->rc->id == P9_RFLUSH) { - kfree(r->tc); - kfree(r->rc); - } } /** From 45abdf1c7be80d6ec3b0b14e59ee75a0d5d9fb37 Mon Sep 17 00:00:00 2001 From: Tom Tucker Date: Thu, 23 Oct 2008 16:33:25 -0500 Subject: [PATCH 080/173] p9: Fix leak of waitqueue in request allocation path If a T or R fcall cannot be allocated, the function returns an error but neglects to free the wait queue that was successfully allocated. If it comes through again a second time this wq will be overwritten with a new allocation and the old allocation will be leaked. Also, if the client is subsequently closed, the close path will attempt to clean up these allocations, so set the req fields to NULL to avoid duplicate free. Signed-off-by: Tom Tucker Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/9p/client.c b/net/9p/client.c index 26ca8ab45196..b56d808e63a9 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -189,6 +189,9 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) printk(KERN_ERR "Couldn't grow tag array\n"); kfree(req->tc); kfree(req->rc); + kfree(req->wq); + req->tc = req->rc = NULL; + req->wq = NULL; return ERR_PTR(-ENOMEM); } req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall); From 1558c6214904c636d5a37f05f84202d6cdd9cff8 Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Mon, 27 Oct 2008 13:15:16 -0500 Subject: [PATCH 081/173] 9p: rdma: remove duplicated #include Removed duplicated #include in net/9p/trans_rdma.c. Signed-off-by: Huang Weiyi Signed-off-by: Eric Van Hensbergen --- net/9p/trans_rdma.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 4e9d2e673cf4..2f1fe5fc1228 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -45,7 +45,6 @@ #include #include #include -#include #define P9_PORT 5640 #define P9_RDMA_SQ_DEPTH 32 From 9f3e9bbe62b0bdbbaa7c689a68a22a7d3c1670f0 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Tue, 28 Oct 2008 14:22:43 -0500 Subject: [PATCH 082/173] unsigned fid->fid cannot be negative Signed-off-by: Roel Kluin Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/9p/client.c b/net/9p/client.c index b56d808e63a9..6e800dd51f09 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -608,7 +608,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) static struct p9_fid *p9_fid_create(struct p9_client *clnt) { - int err; + int ret; struct p9_fid *fid; unsigned long flags; @@ -617,11 +617,12 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt) if (!fid) return ERR_PTR(-ENOMEM); - fid->fid = p9_idpool_get(clnt->fidpool); + ret = p9_idpool_get(clnt->fidpool); if (fid->fid < 0) { - err = -ENOSPC; + ret = -ENOSPC; goto error; } + fid->fid = ret; memset(&fid->qid, 0, sizeof(struct p9_qid)); fid->mode = -1; @@ -638,7 +639,7 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt) error: kfree(fid); - return ERR_PTR(err); + return ERR_PTR(ret); } static void p9_fid_destroy(struct p9_fid *fid) From b0d5fdef521b1eadb3fc2c1283000af7ef0297bc Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 4 Nov 2008 20:46:46 -0800 Subject: [PATCH 083/173] net/9p: fix printk format warnings Fix printk format warnings in net/9p. Built cleanly on 7 arches. net/9p/client.c:820: warning: format '%llx' expects type 'long long unsigned int', but argument 4 has type 'u64' net/9p/client.c:820: warning: format '%llx' expects type 'long long unsigned int', but argument 5 has type 'u64' net/9p/client.c:867: warning: format '%llx' expects type 'long long unsigned int', but argument 4 has type 'u64' net/9p/client.c:867: warning: format '%llx' expects type 'long long unsigned int', but argument 5 has type 'u64' net/9p/client.c:932: warning: format '%llx' expects type 'long long unsigned int', but argument 5 has type 'u64' net/9p/client.c:932: warning: format '%llx' expects type 'long long unsigned int', but argument 6 has type 'u64' net/9p/client.c:982: warning: format '%llx' expects type 'long long unsigned int', but argument 4 has type 'u64' net/9p/client.c:982: warning: format '%llx' expects type 'long long unsigned int', but argument 5 has type 'u64' net/9p/client.c:1025: warning: format '%llx' expects type 'long long unsigned int', but argument 4 has type 'u64' net/9p/client.c:1025: warning: format '%llx' expects type 'long long unsigned int', but argument 5 has type 'u64' net/9p/client.c:1227: warning: format '%llx' expects type 'long long unsigned int', but argument 7 has type 'u64' net/9p/client.c:1227: warning: format '%llx' expects type 'long long unsigned int', but argument 12 has type 'u64' net/9p/client.c:1227: warning: format '%llx' expects type 'long long unsigned int', but argument 8 has type 'u64' net/9p/client.c:1227: warning: format '%llx' expects type 'long long unsigned int', but argument 13 has type 'u64' net/9p/client.c:1252: warning: format '%llx' expects type 'long long unsigned int', but argument 7 has type 'u64' net/9p/client.c:1252: warning: format '%llx' expects type 'long long unsigned int', but argument 12 has type 'u64' net/9p/client.c:1252: warning: format '%llx' expects type 'long long unsigned int', but argument 8 has type 'u64' net/9p/client.c:1252: warning: format '%llx' expects type 'long long unsigned int', but argument 13 has type 'u64' Signed-off-by: Randy Dunlap Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/net/9p/client.c b/net/9p/client.c index 6e800dd51f09..4b529454616d 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -818,7 +818,9 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, } P9_DPRINTK(P9_DEBUG_9P, "<<< RATTACH qid %x.%llx.%x\n", - qid.type, qid.path, qid.version); + qid.type, + (unsigned long long)qid.path, + qid.version); memmove(&fid->qid, &qid, sizeof(struct p9_qid)); @@ -865,7 +867,9 @@ p9_client_auth(struct p9_client *clnt, char *uname, u32 n_uname, char *aname) } P9_DPRINTK(P9_DEBUG_9P, "<<< RAUTH qid %x.%llx.%x\n", - qid.type, qid.path, qid.version); + qid.type, + (unsigned long long)qid.path, + qid.version); memmove(&afid->qid, &qid, sizeof(struct p9_qid)); p9_free_req(clnt, req); @@ -930,7 +934,8 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, for (count = 0; count < nwqids; count++) P9_DPRINTK(P9_DEBUG_9P, "<<< [%d] %x.%llx.%x\n", - count, wqids[count].type, wqids[count].path, + count, wqids[count].type, + (unsigned long long)wqids[count].path, wqids[count].version); if (nwname) @@ -980,7 +985,9 @@ int p9_client_open(struct p9_fid *fid, int mode) } P9_DPRINTK(P9_DEBUG_9P, "<<< ROPEN qid %x.%llx.%x iounit %x\n", - qid.type, qid.path, qid.version, iounit); + qid.type, + (unsigned long long)qid.path, + qid.version, iounit); fid->mode = mode; fid->iounit = iounit; @@ -1023,7 +1030,9 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, } P9_DPRINTK(P9_DEBUG_9P, "<<< RCREATE qid %x.%llx.%x iounit %x\n", - qid.type, qid.path, qid.version, iounit); + qid.type, + (unsigned long long)qid.path, + qid.version, iounit); fid->mode = mode; fid->iounit = iounit; @@ -1230,9 +1239,9 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) "<<< name=%s uid=%s gid=%s muid=%s extension=(%s)\n" "<<< uid=%d gid=%d n_muid=%d\n", ret->size, ret->type, ret->dev, ret->qid.type, - ret->qid.path, ret->qid.version, ret->mode, - ret->atime, ret->mtime, ret->length, ret->name, - ret->uid, ret->gid, ret->muid, ret->extension, + (unsigned long long)ret->qid.path, ret->qid.version, ret->mode, + ret->atime, ret->mtime, (unsigned long long)ret->length, + ret->name, ret->uid, ret->gid, ret->muid, ret->extension, ret->n_uid, ret->n_gid, ret->n_muid); free_and_error: @@ -1255,9 +1264,9 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) " name=%s uid=%s gid=%s muid=%s extension=(%s)\n" " uid=%d gid=%d n_muid=%d\n", wst->size, wst->type, wst->dev, wst->qid.type, - wst->qid.path, wst->qid.version, wst->mode, - wst->atime, wst->mtime, wst->length, wst->name, - wst->uid, wst->gid, wst->muid, wst->extension, + (unsigned long long)wst->qid.path, wst->qid.version, wst->mode, + wst->atime, wst->mtime, (unsigned long long)wst->length, + wst->name, wst->uid, wst->gid, wst->muid, wst->extension, wst->n_uid, wst->n_gid, wst->n_muid); err = 0; clnt = fid->clnt; From dc8a0843a435b2c0891e7eaea64faaf1ebec9b11 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 5 Nov 2008 23:21:16 +0100 Subject: [PATCH 084/173] [JFFS2] fix race condition in jffs2_lzo_compress() deflate_mutex protects the globals lzo_mem and lzo_compress_buf. However, jffs2_lzo_compress() unlocks deflate_mutex _before_ it has copied out the compressed data from lzo_compress_buf. Correct this by moving the mutex unlock after the copy. In addition, document what deflate_mutex actually protects. Cc: stable@kernel.org Signed-off-by: Geert Uytterhoeven Acked-by: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: David Woodhouse --- fs/jffs2/compr_lzo.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/fs/jffs2/compr_lzo.c b/fs/jffs2/compr_lzo.c index 47b045797e42..90cb60d09787 100644 --- a/fs/jffs2/compr_lzo.c +++ b/fs/jffs2/compr_lzo.c @@ -19,7 +19,7 @@ static void *lzo_mem; static void *lzo_compress_buf; -static DEFINE_MUTEX(deflate_mutex); +static DEFINE_MUTEX(deflate_mutex); /* for lzo_mem and lzo_compress_buf */ static void free_workspace(void) { @@ -49,18 +49,21 @@ static int jffs2_lzo_compress(unsigned char *data_in, unsigned char *cpage_out, mutex_lock(&deflate_mutex); ret = lzo1x_1_compress(data_in, *sourcelen, lzo_compress_buf, &compress_size, lzo_mem); - mutex_unlock(&deflate_mutex); - if (ret != LZO_E_OK) - return -1; + goto fail; if (compress_size > *dstlen) - return -1; + goto fail; memcpy(cpage_out, lzo_compress_buf, compress_size); - *dstlen = compress_size; + mutex_unlock(&deflate_mutex); + *dstlen = compress_size; return 0; + + fail: + mutex_unlock(&deflate_mutex); + return -1; } static int jffs2_lzo_decompress(unsigned char *data_in, unsigned char *cpage_out, From c78d0cf2925bffae8a6f00e7d9b8e971b0392edd Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 5 Nov 2008 12:04:46 +0000 Subject: [PATCH 085/173] x86: don't allow nr_irqs > NR_IRQS Impact: fix boot hang on 32-bit systems with more than 224 IO-APIC pins On some 32-bit systems with a lot of IO-APICs probe_nr_irqs() can return a value larger than NR_IRQS. This will lead to probe_irq_on() overrunning the irq_desc array. I hit this when running net-next-2.6 (close to 2.6.28-rc3) on a Supermicro dual Xeon system. NR_IRQS is 224 but probe_nr_irqs() detects 5 IOAPICs and returns 240. Here are the log messages: Tue Nov 4 16:53:47 2008 ACPI: IOAPIC (id[0x01] address[0xfec00000] gsi_base[0]) Tue Nov 4 16:53:47 2008 IOAPIC[0]: apic_id 1, version 32, address 0xfec00000, GSI 0-23 Tue Nov 4 16:53:47 2008 ACPI: IOAPIC (id[0x02] address[0xfec81000] gsi_base[24]) Tue Nov 4 16:53:47 2008 IOAPIC[1]: apic_id 2, version 32, address 0xfec81000, GSI 24-47 Tue Nov 4 16:53:47 2008 ACPI: IOAPIC (id[0x03] address[0xfec81400] gsi_base[48]) Tue Nov 4 16:53:47 2008 IOAPIC[2]: apic_id 3, version 32, address 0xfec81400, GSI 48-71 Tue Nov 4 16:53:47 2008 ACPI: IOAPIC (id[0x04] address[0xfec82000] gsi_base[72]) Tue Nov 4 16:53:47 2008 IOAPIC[3]: apic_id 4, version 32, address 0xfec82000, GSI 72-95 Tue Nov 4 16:53:47 2008 ACPI: IOAPIC (id[0x05] address[0xfec82400] gsi_base[96]) Tue Nov 4 16:53:47 2008 IOAPIC[4]: apic_id 5, version 32, address 0xfec82400, GSI 96-119 Tue Nov 4 16:53:47 2008 ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 2 high edge) Tue Nov 4 16:53:47 2008 ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 high level) Tue Nov 4 16:53:47 2008 Enabling APIC mode: Flat. Using 5 I/O APICs Signed-off-by: Ben Hutchings Acked-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index b764d7429c61..7a3f2028e2eb 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3611,6 +3611,8 @@ int __init probe_nr_irqs(void) /* something wrong ? */ if (nr < nr_min) nr = nr_min; + if (WARN_ON(nr > NR_IRQS)) + nr = NR_IRQS; return nr; } From 1b4897688011cd05e07f00dcfe6af3331eb36a3c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 4 Nov 2008 14:10:13 -0800 Subject: [PATCH 086/173] x86: size NR_IRQS on 32-bit systems the same way as 64-bit Impact: make NR_IRQS big enough for system with lots of apic/pins If lots of IO_APIC's are there (or can be there), size the same way as 64-bit, depending on MAX_IO_APICS and NR_CPUS. This fixes the boot problem reported by Ben Hutchings on a 32-bit server with 5 IO-APICs and 240 IO-APIC pins. Signed-off-by: Yinghai Tested-by: Ben Hutchings Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index d843ed0e9b2e..503aadc4ad35 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -101,30 +101,22 @@ #define LAST_VM86_IRQ 15 #define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) -#ifdef CONFIG_X86_64 +#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_PARAVIRT) && !defined(CONFIG_X86_VISWS) && !defined(CONFIG_X86_VOYAGER) # if NR_CPUS < MAX_IO_APICS # define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) # else # define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) # endif -#elif !defined(CONFIG_X86_VOYAGER) - -# if defined(CONFIG_X86_IO_APIC) || defined(CONFIG_PARAVIRT) || defined(CONFIG_X86_VISWS) - -# define NR_IRQS 224 - -# else /* IO_APIC || PARAVIRT */ - -# define NR_IRQS 16 - -# endif - -#else /* !VISWS && !VOYAGER */ +#elif defined(CONFIG_PARAVIRT) || defined(CONFIG_X86_VISWS) || defined(CONFIG_X86_VOYAGER) # define NR_IRQS 224 -#endif /* VISWS */ +#else /* IO_APIC || PARAVIRT */ + +# define NR_IRQS 16 + +#endif /* Voyager specific defines */ /* These define the CPIs we use in linux */ From cb3ac42b8af357fdd9ad838234245b39e5bdb7fe Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 6 Nov 2008 17:28:01 +1100 Subject: [PATCH 087/173] md: revert the recent addition of a call to the BLKRRPART ioctl. It turns out that it is only safe to call blkdev_ioctl when the device is actually open (as ->bd_disk is set to NULL on last close). And it is quite possible for do_md_stop to be called when the device is not open. So discard the call to blkdev_ioctl(BLKRRPART) which was added in commit 934d9c23b4c7e31840a895ba4b7e88d6413c81f3 It is just as easy to call this ioctl from userspace when needed (on mdadm -S) so leave it out of the kernel Signed-off-by: NeilBrown --- drivers/md/md.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 9abf6ed16535..1b1d32694f6f 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3884,7 +3884,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) if (mode == 0) { mdk_rdev_t *rdev; struct list_head *tmp; - struct block_device *bdev; printk(KERN_INFO "md: %s stopped.\n", mdname(mddev)); @@ -3941,11 +3940,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) mddev->degraded = 0; mddev->barriers_work = 0; mddev->safemode = 0; - bdev = bdget_disk(mddev->gendisk, 0); - if (bdev) { - blkdev_ioctl(bdev, 0, BLKRRPART, 0); - bdput(bdev); - } kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); } else if (mddev->pers) From a53a6c85756339f82ff19e001e90cfba2d6299a8 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 6 Nov 2008 17:28:20 +1100 Subject: [PATCH 088/173] md: fix bug in raid10 recovery. Adding a spare to a raid10 doesn't cause recovery to start. This is due to an silly type in commit 6c2fce2ef6b4821c21b5c42c7207cb9cf8c87eda and so is a bug in 2.6.27 and .28-rc. Thanks to Thomas Backlund for bisecting to find this. Cc: Thomas Backlund Cc: stable@kernel.org Signed-off-by: NeilBrown --- drivers/md/raid10.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index da5129a24b18..970a96ef9b18 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1137,7 +1137,7 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) if (!enough(conf)) return -EINVAL; - if (rdev->raid_disk) + if (rdev->raid_disk >= 0) first = last = rdev->raid_disk; if (rdev->saved_raid_disk >= 0 && From da85f865b1dcec0853c48b763ed312441ce0c7df Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 5 Nov 2008 13:37:27 -0600 Subject: [PATCH 089/173] x86: mention ACPI in top-level Kconfig menu Impact: clarify menuconfig text Mention ACPI in the top-level menu to give a clue as to where it lives. This matches what ia64 does. Signed-off-by: Bjorn Helgaas Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6f20718d3156..5d6aa4013dc7 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1494,7 +1494,7 @@ config HAVE_ARCH_EARLY_PFN_TO_NID def_bool X86_64 depends on NUMA -menu "Power management options" +menu "Power management and ACPI options" depends on !X86_VOYAGER config ARCH_HIBERNATION_HEADER From 43381785a5ba1cb424b36812373a6a04054b5c3c Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 20 Oct 2008 15:43:43 +0200 Subject: [PATCH 090/173] block: remove unused ll_new_mergeable() Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- block/blk-merge.c | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/block/blk-merge.c b/block/blk-merge.c index 8681cd6f9911..b92f5b0866b0 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -222,27 +222,6 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq, } EXPORT_SYMBOL(blk_rq_map_sg); -static inline int ll_new_mergeable(struct request_queue *q, - struct request *req, - struct bio *bio) -{ - int nr_phys_segs = bio_phys_segments(q, bio); - - if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { - req->cmd_flags |= REQ_NOMERGE; - if (req == q->last_merge) - q->last_merge = NULL; - return 0; - } - - /* - * A hw segment is just getting larger, bump just the phys - * counter. - */ - req->nr_phys_segments += nr_phys_segs; - return 1; -} - static inline int ll_new_hw_segment(struct request_queue *q, struct request *req, struct bio *bio) From f92131c3dd567fc6df18ce3f46fcf57ecbdefbe0 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 29 Oct 2008 14:10:51 +0100 Subject: [PATCH 091/173] bio: define __BIOVEC_PHYS_MERGEABLE Define __BIOVEC_PHYS_MERGEABLE as the default implementation of BIOVEC_PHYS_MERGEABLE, so that its available for reuse within an arch-specific definition of BIOVEC_PHYS_MERGEABLE. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Jens Axboe --- include/linux/bio.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/linux/bio.h b/include/linux/bio.h index 1c91a176b9ae..6a642098e5c3 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -236,12 +236,16 @@ static inline void *bio_data(struct bio *bio) #define __BVEC_END(bio) bio_iovec_idx((bio), (bio)->bi_vcnt - 1) #define __BVEC_START(bio) bio_iovec_idx((bio), (bio)->bi_idx) +/* Default implementation of BIOVEC_PHYS_MERGEABLE */ +#define __BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ + ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) + /* * allow arch override, for eg virtualized architectures (put in asm/io.h) */ #ifndef BIOVEC_PHYS_MERGEABLE #define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ - ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) + __BIOVEC_PHYS_MERGEABLE(vec1, vec2) #endif #define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \ From 2920ebbd65f3e80c318adf5191ac0987142bda80 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 30 Oct 2008 08:32:29 +0100 Subject: [PATCH 092/173] block: add timer on blkdev_dequeue_request() not elv_next_request() Block queue supports two usage models - one where block driver peeks at the front of queue using elv_next_request(), processes it and finishes it and the other where block driver peeks at the front of queue, dequeue the request using blkdev_dequeue_request() and finishes it. The latter is more flexible as it allows the driver to process multiple commands concurrently. These two inconsistent usage models affect the block layer implementation confusing. For some, elv_next_request() is considered the issue point while others consider blkdev_dequeue_request() the issue point. Till now the inconsistency mostly affect only accounting, so it didn't really break anything seriously; however, with block layer timeout, this inconsistency hits hard. Block layer considers elv_next_request() the issue point and adds timer but SCSI layer thinks it was just peeking and when the request can't process the command right away, it's just left there without further processing. This makes the request dangling on the timer list and, when the timer goes off, the request which the SCSI layer and below think is still on the block queue ends up in the EH queue, causing various problems - EH hang (failed count goes over busy count and EH never wakes up), WARN_ON() and oopses as low level driver trying to handle the unknown command, etc. depending on the timing. As SCSI midlayer is the only user of block layer timer at the moment, moving blk_add_timer() to elv_dequeue_request() fixes the problem; however, this two usage models definitely need to be cleaned up in the future. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/elevator.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/block/elevator.c b/block/elevator.c index 59173a69ebdf..9ac82dde99dd 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -773,12 +773,6 @@ struct request *elv_next_request(struct request_queue *q) */ rq->cmd_flags |= REQ_STARTED; blk_add_trace_rq(q, rq, BLK_TA_ISSUE); - - /* - * We are now handing the request to the hardware, - * add the timeout handler - */ - blk_add_timer(rq); } if (!q->boundary_rq || q->boundary_rq == rq) { @@ -850,6 +844,12 @@ void elv_dequeue_request(struct request_queue *q, struct request *rq) */ if (blk_account_rq(rq)) q->in_flight++; + + /* + * We are now handing the request to the hardware, add the + * timeout handler. + */ + blk_add_timer(rq); } EXPORT_SYMBOL(elv_dequeue_request); From e78042e5b83936b1d12a4b5bbb492bdd88ad76c6 Mon Sep 17 00:00:00 2001 From: Mike Anderson Date: Thu, 30 Oct 2008 02:16:20 -0700 Subject: [PATCH 093/173] blk: move blk_delete_timer call in end_that_request_last Move the calling blk_delete_timer to later in end_that_request_last to address an issue where blkdev_dequeue_request may have add a timer for the request. Signed-off-by: Mike Anderson Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index c3df30cfb3fc..10e8a64a5a5b 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1770,8 +1770,6 @@ static void end_that_request_last(struct request *req, int error) { struct gendisk *disk = req->rq_disk; - blk_delete_timer(req); - if (blk_rq_tagged(req)) blk_queue_end_tag(req->q, req); @@ -1781,6 +1779,8 @@ static void end_that_request_last(struct request *req, int error) if (unlikely(laptop_mode) && blk_fs_request(req)) laptop_io_completion(); + blk_delete_timer(req); + /* * Account IO completion. bar_rq isn't accounted as a normal * IO on queueing nor completion. Accounting the containing From 561920a0d2bb6d63343e83acfd784c0a77bd28d1 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 30 Oct 2008 18:28:41 +0100 Subject: [PATCH 094/173] generic-ipi: fix the smp_mb() placement smp_mb() is needed (to make the memory operations visible globally) before sending the ipi on the sender and the receiver (on Alpha atleast) needs smp_read_barrier_depends() in the handler before reading the call_single_queue list in a lock-free fashion. On x86, x2apic mode register accesses for sending IPI's don't have serializing semantics. So the need for smp_mb() before sending the IPI becomes more critical in x2apic mode. Remove the unnecessary smp_mb() in csd_flag_wait(), as the presence of that smp_mb() doesn't mean anything on the sender, when the ipi receiver is not doing any thing special (like memory fence) after clearing the CSD_FLAG_WAIT. Signed-off-by: Suresh Siddha Signed-off-by: Jens Axboe --- kernel/smp.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/kernel/smp.c b/kernel/smp.c index f362a8553777..75c8dde58c55 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -51,10 +51,6 @@ static void csd_flag_wait(struct call_single_data *data) { /* Wait for response */ do { - /* - * We need to see the flags store in the IPI handler - */ - smp_mb(); if (!(data->flags & CSD_FLAG_WAIT)) break; cpu_relax(); @@ -76,6 +72,11 @@ static void generic_exec_single(int cpu, struct call_single_data *data) list_add_tail(&data->list, &dst->list); spin_unlock_irqrestore(&dst->lock, flags); + /* + * Make the list addition visible before sending the ipi. + */ + smp_mb(); + if (ipi) arch_send_call_function_single_ipi(cpu); @@ -157,7 +158,7 @@ void generic_smp_call_function_single_interrupt(void) * Need to see other stores to list head for checking whether * list is empty without holding q->lock */ - smp_mb(); + smp_read_barrier_depends(); while (!list_empty(&q->list)) { unsigned int data_flags; @@ -191,7 +192,7 @@ void generic_smp_call_function_single_interrupt(void) /* * See comment on outer loop */ - smp_mb(); + smp_read_barrier_depends(); } } @@ -370,6 +371,11 @@ int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info, list_add_tail_rcu(&data->csd.list, &call_function_queue); spin_unlock_irqrestore(&call_function_lock, flags); + /* + * Make the list addition visible before sending the ipi. + */ + smp_mb(); + /* Send a message to all CPUs in the map */ arch_send_call_function_ipi(mask); From 89f97496e81d2112b5e41416fe3020688c443818 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 5 Nov 2008 10:21:06 +0100 Subject: [PATCH 095/173] block: fix __blkdev_get() for removable devices Commit 0762b8bde9729f10f8e6249809660ff2ec3ad735 moved disk_get_part() in front of recursive get on the whole disk, which caused removable devices to try disk_get_part() before rescanning after a new media is inserted, which might fail legit open attempts or give the old partition. This patch fixes the problem by moving disk_get_part() after __blkdev_get() on the whole disk. This problem was spotted by Borislav Petkov. Signed-off-by: Tejun Heo Tested-by: Borislav Petkov Signed-off-by: Jens Axboe --- fs/block_dev.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 88a776fa0ef6..db831efbdbbd 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -986,7 +986,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) { struct gendisk *disk; - struct hd_struct *part = NULL; int ret; int partno; int perm = 0; @@ -1004,24 +1003,25 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) return ret; } - ret = -ENXIO; - lock_kernel(); + ret = -ENXIO; disk = get_gendisk(bdev->bd_dev, &partno); if (!disk) goto out_unlock_kernel; - part = disk_get_part(disk, partno); - if (!part) - goto out_unlock_kernel; mutex_lock_nested(&bdev->bd_mutex, for_part); if (!bdev->bd_openers) { bdev->bd_disk = disk; - bdev->bd_part = part; bdev->bd_contains = bdev; if (!partno) { struct backing_dev_info *bdi; + + ret = -ENXIO; + bdev->bd_part = disk_get_part(disk, partno); + if (!bdev->bd_part) + goto out_clear; + if (disk->fops->open) { ret = disk->fops->open(bdev, mode); if (ret) @@ -1049,18 +1049,17 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) bdev->bd_contains = whole; bdev->bd_inode->i_data.backing_dev_info = whole->bd_inode->i_data.backing_dev_info; + bdev->bd_part = disk_get_part(disk, partno); if (!(disk->flags & GENHD_FL_UP) || - !part || !part->nr_sects) { + !bdev->bd_part || !bdev->bd_part->nr_sects) { ret = -ENXIO; goto out_clear; } - bd_set_size(bdev, (loff_t)part->nr_sects << 9); + bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); } } else { - disk_put_part(part); put_disk(disk); module_put(disk->fops->owner); - part = NULL; disk = NULL; if (bdev->bd_contains == bdev) { if (bdev->bd_disk->fops->open) { @@ -1080,6 +1079,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) return 0; out_clear: + disk_put_part(bdev->bd_part); bdev->bd_disk = NULL; bdev->bd_part = NULL; bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; @@ -1091,7 +1091,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) out_unlock_kernel: unlock_kernel(); - disk_put_part(part); if (disk) module_put(disk->fops->owner); put_disk(disk); From 9c133c469d38043d5aadaa03f2fb840d88d1cf4f Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 6 Nov 2008 08:42:48 +0100 Subject: [PATCH 096/173] Add round_jiffies_up and related routines This patch (as1158b) adds round_jiffies_up() and friends. These routines work like the analogous round_jiffies() functions, except that they will never round down. The new routines will be useful for timeouts where we don't care exactly when the timer expires, provided it doesn't expire too soon. Signed-off-by: Alan Stern Signed-off-by: Jens Axboe --- include/linux/timer.h | 5 ++ kernel/timer.c | 149 ++++++++++++++++++++++++++++++------------ 2 files changed, 114 insertions(+), 40 deletions(-) diff --git a/include/linux/timer.h b/include/linux/timer.h index d4ba79248a27..daf9685b861c 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -186,4 +186,9 @@ unsigned long __round_jiffies_relative(unsigned long j, int cpu); unsigned long round_jiffies(unsigned long j); unsigned long round_jiffies_relative(unsigned long j); +unsigned long __round_jiffies_up(unsigned long j, int cpu); +unsigned long __round_jiffies_up_relative(unsigned long j, int cpu); +unsigned long round_jiffies_up(unsigned long j); +unsigned long round_jiffies_up_relative(unsigned long j); + #endif diff --git a/kernel/timer.c b/kernel/timer.c index 56becf373c58..dbd50fabe4c7 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -112,6 +112,44 @@ timer_set_base(struct timer_list *timer, struct tvec_base *new_base) tbase_get_deferrable(timer->base)); } +static unsigned long round_jiffies_common(unsigned long j, int cpu, + bool force_up) +{ + int rem; + unsigned long original = j; + + /* + * We don't want all cpus firing their timers at once hitting the + * same lock or cachelines, so we skew each extra cpu with an extra + * 3 jiffies. This 3 jiffies came originally from the mm/ code which + * already did this. + * The skew is done by adding 3*cpunr, then round, then subtract this + * extra offset again. + */ + j += cpu * 3; + + rem = j % HZ; + + /* + * If the target jiffie is just after a whole second (which can happen + * due to delays of the timer irq, long irq off times etc etc) then + * we should round down to the whole second, not up. Use 1/4th second + * as cutoff for this rounding as an extreme upper bound for this. + * But never round down if @force_up is set. + */ + if (rem < HZ/4 && !force_up) /* round down */ + j = j - rem; + else /* round up */ + j = j - rem + HZ; + + /* now that we have rounded, subtract the extra skew again */ + j -= cpu * 3; + + if (j <= jiffies) /* rounding ate our timeout entirely; */ + return original; + return j; +} + /** * __round_jiffies - function to round jiffies to a full second * @j: the time in (absolute) jiffies that should be rounded @@ -134,38 +172,7 @@ timer_set_base(struct timer_list *timer, struct tvec_base *new_base) */ unsigned long __round_jiffies(unsigned long j, int cpu) { - int rem; - unsigned long original = j; - - /* - * We don't want all cpus firing their timers at once hitting the - * same lock or cachelines, so we skew each extra cpu with an extra - * 3 jiffies. This 3 jiffies came originally from the mm/ code which - * already did this. - * The skew is done by adding 3*cpunr, then round, then subtract this - * extra offset again. - */ - j += cpu * 3; - - rem = j % HZ; - - /* - * If the target jiffie is just after a whole second (which can happen - * due to delays of the timer irq, long irq off times etc etc) then - * we should round down to the whole second, not up. Use 1/4th second - * as cutoff for this rounding as an extreme upper bound for this. - */ - if (rem < HZ/4) /* round down */ - j = j - rem; - else /* round up */ - j = j - rem + HZ; - - /* now that we have rounded, subtract the extra skew again */ - j -= cpu * 3; - - if (j <= jiffies) /* rounding ate our timeout entirely; */ - return original; - return j; + return round_jiffies_common(j, cpu, false); } EXPORT_SYMBOL_GPL(__round_jiffies); @@ -191,13 +198,10 @@ EXPORT_SYMBOL_GPL(__round_jiffies); */ unsigned long __round_jiffies_relative(unsigned long j, int cpu) { - /* - * In theory the following code can skip a jiffy in case jiffies - * increments right between the addition and the later subtraction. - * However since the entire point of this function is to use approximate - * timeouts, it's entirely ok to not handle that. - */ - return __round_jiffies(j + jiffies, cpu) - jiffies; + unsigned long j0 = jiffies; + + /* Use j0 because jiffies might change while we run */ + return round_jiffies_common(j + j0, cpu, false) - j0; } EXPORT_SYMBOL_GPL(__round_jiffies_relative); @@ -218,7 +222,7 @@ EXPORT_SYMBOL_GPL(__round_jiffies_relative); */ unsigned long round_jiffies(unsigned long j) { - return __round_jiffies(j, raw_smp_processor_id()); + return round_jiffies_common(j, raw_smp_processor_id(), false); } EXPORT_SYMBOL_GPL(round_jiffies); @@ -243,6 +247,71 @@ unsigned long round_jiffies_relative(unsigned long j) } EXPORT_SYMBOL_GPL(round_jiffies_relative); +/** + * __round_jiffies_up - function to round jiffies up to a full second + * @j: the time in (absolute) jiffies that should be rounded + * @cpu: the processor number on which the timeout will happen + * + * This is the same as __round_jiffies() except that it will never + * round down. This is useful for timeouts for which the exact time + * of firing does not matter too much, as long as they don't fire too + * early. + */ +unsigned long __round_jiffies_up(unsigned long j, int cpu) +{ + return round_jiffies_common(j, cpu, true); +} +EXPORT_SYMBOL_GPL(__round_jiffies_up); + +/** + * __round_jiffies_up_relative - function to round jiffies up to a full second + * @j: the time in (relative) jiffies that should be rounded + * @cpu: the processor number on which the timeout will happen + * + * This is the same as __round_jiffies_relative() except that it will never + * round down. This is useful for timeouts for which the exact time + * of firing does not matter too much, as long as they don't fire too + * early. + */ +unsigned long __round_jiffies_up_relative(unsigned long j, int cpu) +{ + unsigned long j0 = jiffies; + + /* Use j0 because jiffies might change while we run */ + return round_jiffies_common(j + j0, cpu, true) - j0; +} +EXPORT_SYMBOL_GPL(__round_jiffies_up_relative); + +/** + * round_jiffies_up - function to round jiffies up to a full second + * @j: the time in (absolute) jiffies that should be rounded + * + * This is the same as round_jiffies() except that it will never + * round down. This is useful for timeouts for which the exact time + * of firing does not matter too much, as long as they don't fire too + * early. + */ +unsigned long round_jiffies_up(unsigned long j) +{ + return round_jiffies_common(j, raw_smp_processor_id(), true); +} +EXPORT_SYMBOL_GPL(round_jiffies_up); + +/** + * round_jiffies_up_relative - function to round jiffies up to a full second + * @j: the time in (relative) jiffies that should be rounded + * + * This is the same as round_jiffies_relative() except that it will never + * round down. This is useful for timeouts for which the exact time + * of firing does not matter too much, as long as they don't fire too + * early. + */ +unsigned long round_jiffies_up_relative(unsigned long j) +{ + return __round_jiffies_up_relative(j, raw_smp_processor_id()); +} +EXPORT_SYMBOL_GPL(round_jiffies_up_relative); + static inline void set_running_timer(struct tvec_base *base, struct timer_list *timer) From 7838c15b8dd18e78a523513749e5b54bda07b0cb Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 6 Nov 2008 08:42:49 +0100 Subject: [PATCH 097/173] Block: use round_jiffies_up() This patch (as1159b) changes the timeout routines in the block core to use round_jiffies_up(). There's no point in rounding the timer deadline down, since if it expires too early we will have to restart it. The patch also removes some unnecessary tests when a request is removed from the queue's timer list. Signed-off-by: Alan Stern Signed-off-by: Jens Axboe --- block/blk-timeout.c | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/block/blk-timeout.c b/block/blk-timeout.c index 972a63f848fb..69185ea9fae2 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c @@ -75,14 +75,7 @@ void blk_delete_timer(struct request *req) { struct request_queue *q = req->q; - /* - * Nothing to detach - */ - if (!q->rq_timed_out_fn || !req->deadline) - return; - list_del_init(&req->timeout_list); - if (list_empty(&q->timeout_list)) del_timer(&q->timeout); } @@ -142,7 +135,7 @@ void blk_rq_timed_out_timer(unsigned long data) } if (next_set && !list_empty(&q->timeout_list)) - mod_timer(&q->timeout, round_jiffies(next)); + mod_timer(&q->timeout, round_jiffies_up(next)); spin_unlock_irqrestore(q->queue_lock, flags); } @@ -198,17 +191,10 @@ void blk_add_timer(struct request *req) /* * If the timer isn't already pending or this timeout is earlier - * than an existing one, modify the timer. Round to next nearest + * than an existing one, modify the timer. Round up to next nearest * second. */ - expiry = round_jiffies(req->deadline); - - /* - * We use ->deadline == 0 to detect whether a timer was added or - * not, so just increase to next jiffy for that specific case - */ - if (unlikely(!req->deadline)) - req->deadline = 1; + expiry = round_jiffies_up(req->deadline); if (!timer_pending(&q->timeout) || time_before(expiry, q->timeout.expires)) From 7db282fa67b58daff8a57f9e1c93d4474b5908ff Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 5 Nov 2008 23:36:48 -0800 Subject: [PATCH 098/173] x86: remove VISWS and PARAVIRT around NR_IRQS puzzle Impact: fix warning message when PARAVIRT is set in config Remove stale #ifdef components from our IRQ sizing logic. x86/Voyager is the only holdout. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 503aadc4ad35..0005adb0f941 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -101,18 +101,18 @@ #define LAST_VM86_IRQ 15 #define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) -#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_PARAVIRT) && !defined(CONFIG_X86_VISWS) && !defined(CONFIG_X86_VOYAGER) +#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER) # if NR_CPUS < MAX_IO_APICS # define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) # else # define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) # endif -#elif defined(CONFIG_PARAVIRT) || defined(CONFIG_X86_VISWS) || defined(CONFIG_X86_VOYAGER) +#elif defined(CONFIG_X86_VOYAGER) # define NR_IRQS 224 -#else /* IO_APIC || PARAVIRT */ +#else /* IO_APIC || VOYAGER */ # define NR_IRQS 16 From f1cd14ae52985634d0389e934eba25b5ecf24565 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Thu, 6 Nov 2008 19:41:24 +1100 Subject: [PATCH 099/173] md: linear: Fix a division by zero bug for very small arrays. We currently oops with a divide error on starting a linear software raid array consisting of at least two very small (< 500K) devices. The bug is caused by the calculation of the hash table size which tries to compute sector_div(sz, base) with "base" being zero due to the small size of the component devices of the array. Fix this by requiring the hash spacing to be at least one which implies that also "base" is non-zero. This bug has existed since about 2.6.14. Cc: stable@kernel.org Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/linear.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 190147c79e79..3b90c5c924ec 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -148,6 +148,8 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) min_sectors = conf->array_sectors; sector_div(min_sectors, PAGE_SIZE/sizeof(struct dev_info *)); + if (min_sectors == 0) + min_sectors = 1; /* min_sectors is the minimum spacing that will fit the hash * table in one PAGE. This may be much smaller than needed. From d6f0f39b7d05e62b347c4352d070e4afb3ade4b5 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 4 Nov 2008 13:53:04 -0800 Subject: [PATCH 100/173] x86: add smp_mb() before sending INVALIDATE_TLB_VECTOR Impact: fix rare x2apic hang On x86, x2apic mode accesses for sending IPI's don't have serializing semantics. If the IPI receivner refers(in lock-free fashion) to some memory setup by the sender, the need for smp_mb() before sending the IPI becomes critical in x2apic mode. Add the smp_mb() in native_flush_tlb_others() before sending the IPI. Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/tlb_32.c | 6 ++++++ arch/x86/kernel/tlb_64.c | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index e00534b33534..f4049f3513b6 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c @@ -154,6 +154,12 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, flush_mm = mm; flush_va = va; cpus_or(flush_cpumask, cpumask, flush_cpumask); + + /* + * Make the above memory operations globally visible before + * sending the IPI. + */ + smp_mb(); /* * We have to send the IPI only to * CPUs affected. diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index dcbf7a1159ea..8f919ca69494 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -182,6 +182,11 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, f->flush_va = va; cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask); + /* + * Make the above memory operations globally visible before + * sending the IPI. + */ + smp_mb(); /* * We have to send the IPI only to * CPUs affected. From b954f6f63e7938a11de5bd15cb5cbcac7423cf97 Mon Sep 17 00:00:00 2001 From: Andrew Victor Date: Wed, 5 Nov 2008 22:18:41 +0200 Subject: [PATCH 101/173] [WATCHDOG] SAM9 watchdog - update for moved headers The architecture header files were recently moved from include/asm-arm/mach-at91/ to arch/arm/mach-at91/include/mach/. The SAM9 watchdog driver still includes a header from the old location. Signed-off-by: Andrew Victor Signed-off-by: Wim Van Sebroeck Signed-off-by: Andrew Morton --- drivers/watchdog/at91sam9_wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/at91sam9_wdt.c b/drivers/watchdog/at91sam9_wdt.c index b4babfc31586..b1da287f90ec 100644 --- a/drivers/watchdog/at91sam9_wdt.c +++ b/drivers/watchdog/at91sam9_wdt.c @@ -30,7 +30,7 @@ #include #include -#include +#include #define DRV_NAME "AT91SAM9 Watchdog" From f0e625c1aa24e861c224fb778c377b2ddb443d2b Mon Sep 17 00:00:00 2001 From: Andrew Victor Date: Wed, 5 Nov 2008 22:36:35 +0200 Subject: [PATCH 102/173] [WATCHDOG] SAM9 watchdog - supported on all SAM9 and CAP9 processors The SAM9 watchdog driver is usable on the whole family of AT91SAM9 and CAP9 processors. Update the configuration to indicate this and allow the driver to be selected. Signed-off-by: Andrew Victor Signed-off-by: Wim Van Sebroeck Signed-off-by: Andrew Morton --- drivers/watchdog/Kconfig | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 1a22fe782a27..4fd3fa5546b1 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -67,11 +67,11 @@ config AT91RM9200_WATCHDOG system when the timeout is reached. config AT91SAM9X_WATCHDOG - tristate "AT91SAM9X watchdog" - depends on WATCHDOG && (ARCH_AT91SAM9260 || ARCH_AT91SAM9261) + tristate "AT91SAM9X / AT91CAP9 watchdog" + depends on ARCH_AT91 && !ARCH_AT91RM9200 help - Watchdog timer embedded into AT91SAM9X chips. This will reboot your - system when the timeout is reached. + Watchdog timer embedded into AT91SAM9X and AT91CAP9 chips. This will + reboot your system when the timeout is reached. config 21285_WATCHDOG tristate "DC21285 watchdog" From 80be308dfa3798c7bad0fc81760b2faf83870e91 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 6 Nov 2008 14:59:05 +0100 Subject: [PATCH 103/173] AMD IOMMU: fix lazy IO/TLB flushing in unmap path Lazy flushing needs to take care of the unmap path too which is not yet implemented and leads to stale IO/TLB entries. This is fixed by this patch. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 38e88d40ab10..4755bbc7ae5b 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -526,6 +526,9 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, { address >>= PAGE_SHIFT; iommu_area_free(dom->bitmap, address, pages); + + if (address + pages >= dom->next_bit) + dom->need_flush = true; } /**************************************************************************** @@ -981,8 +984,10 @@ static void __unmap_single(struct amd_iommu *iommu, dma_ops_free_addresses(dma_dom, dma_addr, pages); - if (amd_iommu_unmap_flush) + if (amd_iommu_unmap_flush || dma_dom->need_flush) { iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size); + dma_dom->need_flush = false; + } } /* From b9c3bfc24e1088d260de4091b2b41808c7398355 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 6 Nov 2008 12:05:40 +0000 Subject: [PATCH 104/173] x86: align DirectMap in /proc/meminfo Impact: right-align /proc/meminfo consistent with other fields When the split-LRU patches added Inactive(anon) and Inactive(file) lines to /proc/meminfo, all counts were moved two columns rightwards to fit in. Now move x86's DirectMap lines two columns rightwards to line up. Signed-off-by: Hugh Dickins Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index f1dc1b75d166..e89d24815f26 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -67,18 +67,18 @@ static void split_page_count(int level) void arch_report_meminfo(struct seq_file *m) { - seq_printf(m, "DirectMap4k: %8lu kB\n", + seq_printf(m, "DirectMap4k: %8lu kB\n", direct_pages_count[PG_LEVEL_4K] << 2); #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) - seq_printf(m, "DirectMap2M: %8lu kB\n", + seq_printf(m, "DirectMap2M: %8lu kB\n", direct_pages_count[PG_LEVEL_2M] << 11); #else - seq_printf(m, "DirectMap4M: %8lu kB\n", + seq_printf(m, "DirectMap4M: %8lu kB\n", direct_pages_count[PG_LEVEL_2M] << 12); #endif #ifdef CONFIG_X86_64 if (direct_gbpages) - seq_printf(m, "DirectMap1G: %8lu kB\n", + seq_printf(m, "DirectMap1G: %8lu kB\n", direct_pages_count[PG_LEVEL_1G] << 20); #endif } From 8d00450d296dedec9ada38d43b83e79cca6fd5a3 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Tue, 4 Nov 2008 12:52:44 -0200 Subject: [PATCH 105/173] Revert "x86: default to reboot via ACPI" This reverts commit c7ffa6c26277b403920e2255d10df849bd613380. the assumptio of this change was that this would not break any existing machine. Andrey Borzenkov reported troubles with the ACPI reboot method: the system would hang on reboot, necessiating a power cycle. Probably more systems are affected as well. Also, there are patches queued up for v2.6.29 to disable virtualization on emergency_restart() - which was the original motivation of this change. Reported-by: Andrey Borzenkov Bisected-by: Andrey Borzenkov Signed-off-by: Eduardo Habkost Acked-by: Avi Kivity Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index f4c93f1cfc19..724adfc63cb9 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -29,11 +29,7 @@ EXPORT_SYMBOL(pm_power_off); static const struct desc_ptr no_idt = {}; static int reboot_mode; -/* - * Keyboard reset and triple fault may result in INIT, not RESET, which - * doesn't work when we're in vmx root mode. Try ACPI first. - */ -enum reboot_type reboot_type = BOOT_ACPI; +enum reboot_type reboot_type = BOOT_KBD; int reboot_force; #if defined(CONFIG_X86_32) && defined(CONFIG_SMP) From ab4f2ee130d5ffcf35616e1f5c6ab75af5b463b6 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 6 Nov 2008 17:11:07 +0000 Subject: [PATCH 106/173] [ARM] fix naming of MODULE_START / MODULE_END As of 73bdf0a60e607f4b8ecc5aec597105976565a84f, the kernel needs to know where modules are located in the virtual address space. On ARM, we located this region between MODULE_START and MODULE_END. Unfortunately, everyone else calls it MODULES_VADDR and MODULES_END. Update ARM to use the same naming, so is_vmalloc_or_module_addr() can work properly. Also update the comment on mm/vmalloc.c to reflect that ARM also places modules in a separate region from the vmalloc space. Signed-off-by: Russell King --- arch/arm/include/asm/memory.h | 12 ++++++------ arch/arm/kernel/module.c | 8 ++++---- arch/arm/mm/mmu.c | 4 ++-- mm/vmalloc.c | 2 +- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 809ff9ab853a..77764301844b 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -44,10 +44,10 @@ * The module space lives between the addresses given by TASK_SIZE * and PAGE_OFFSET - it must be within 32MB of the kernel text. */ -#define MODULE_END (PAGE_OFFSET) -#define MODULE_START (MODULE_END - 16*1048576) +#define MODULES_END (PAGE_OFFSET) +#define MODULES_VADDR (MODULES_END - 16*1048576) -#if TASK_SIZE > MODULE_START +#if TASK_SIZE > MODULES_VADDR #error Top of user space clashes with start of module space #endif @@ -56,7 +56,7 @@ * Since we use sections to map it, this macro replaces the physical address * with its virtual address while keeping offset from the base section. */ -#define XIP_VIRT_ADDR(physaddr) (MODULE_START + ((physaddr) & 0x000fffff)) +#define XIP_VIRT_ADDR(physaddr) (MODULES_VADDR + ((physaddr) & 0x000fffff)) /* * Allow 16MB-aligned ioremap pages @@ -94,8 +94,8 @@ /* * The module can be at any place in ram in nommu mode. */ -#define MODULE_END (END_MEM) -#define MODULE_START (PHYS_OFFSET) +#define MODULES_END (END_MEM) +#define MODULES_VADDR (PHYS_OFFSET) #endif /* !CONFIG_MMU */ diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index 9203ba7d58ee..b8d965dcd6fd 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -26,12 +26,12 @@ /* * The XIP kernel text is mapped in the module area for modules and * some other stuff to work without any indirect relocations. - * MODULE_START is redefined here and not in asm/memory.h to avoid + * MODULES_VADDR is redefined here and not in asm/memory.h to avoid * recompiling the whole kernel when CONFIG_XIP_KERNEL is turned on/off. */ extern void _etext; -#undef MODULE_START -#define MODULE_START (((unsigned long)&_etext + ~PGDIR_MASK) & PGDIR_MASK) +#undef MODULES_VADDR +#define MODULES_VADDR (((unsigned long)&_etext + ~PGDIR_MASK) & PGDIR_MASK) #endif #ifdef CONFIG_MMU @@ -43,7 +43,7 @@ void *module_alloc(unsigned long size) if (!size) return NULL; - area = __get_vm_area(size, VM_ALLOC, MODULE_START, MODULE_END); + area = __get_vm_area(size, VM_ALLOC, MODULES_VADDR, MODULES_END); if (!area) return NULL; diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 8ba754064559..34e53596ff1d 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -654,7 +654,7 @@ static inline void prepare_page_table(struct meminfo *mi) /* * Clear out all the mappings below the kernel image. */ - for (addr = 0; addr < MODULE_START; addr += PGDIR_SIZE) + for (addr = 0; addr < MODULES_VADDR; addr += PGDIR_SIZE) pmd_clear(pmd_off_k(addr)); #ifdef CONFIG_XIP_KERNEL @@ -766,7 +766,7 @@ static void __init devicemaps_init(struct machine_desc *mdesc) */ #ifdef CONFIG_XIP_KERNEL map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK); - map.virtual = MODULE_START; + map.virtual = MODULES_VADDR; map.length = ((unsigned long)&_etext - map.virtual + ~SECTION_MASK) & SECTION_MASK; map.type = MT_ROM; create_mapping(&map); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index f1cc03bbf6ac..66fad3fc02b1 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -178,7 +178,7 @@ static int vmap_page_range(unsigned long addr, unsigned long end, static inline int is_vmalloc_or_module_addr(const void *x) { /* - * x86-64 and sparc64 put modules in a special place, + * ARM, x86-64 and sparc64 put modules in a special place, * and fall back on vmalloc() if that fails. Others * just put it in the vmalloc space. */ From b1cce6b1b2785fd61454b47ceacb461815407662 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 4 Nov 2008 10:52:28 +0000 Subject: [PATCH 107/173] [ARM] mm: fix page table initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As a result of the ptebits changes, we ended up marking device mappings as normal memory on ARMv7 CPUs, resulting in undesirable behaviour with serial ports and the like. While reviewing the section mapping table entries, other errors in the memory type settings for devices were detected and confirmed to prevent Xscale3 platforms booting. Tested on: OMAP34xx (ARMv7), OMAP24xx (ARMv6), OMAP16xx (ARM926T, ARMv5), PXA311 (Xscale3), PXA272 (Xscale), PXA255 (Xscale), IXP42x (Xscale), S3C2410 (ARM920T, ARMv4T), ARM720T (ARMv4T) StrongARM-110 (ARMv4) Acked-by: Tony Lindgren Tested-by: Robert Jarzmik Tested-by: Mike Rapoport Tested-by: Ben Dooks Tested-by: Anders Grafström Signed-off-by: Russell King --- arch/arm/include/asm/system.h | 4 ++ arch/arm/mm/mmu.c | 107 ++++++++++++++++++++++++---------- arch/arm/mm/proc-v7.S | 12 ++-- 3 files changed, 87 insertions(+), 36 deletions(-) diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h index 7aad78420f18..568020b34e3e 100644 --- a/arch/arm/include/asm/system.h +++ b/arch/arm/include/asm/system.h @@ -42,6 +42,10 @@ #define CR_U (1 << 22) /* Unaligned access operation */ #define CR_XP (1 << 23) /* Extended page tables */ #define CR_VE (1 << 24) /* Vectored interrupts */ +#define CR_EE (1 << 25) /* Exception (Big) Endian */ +#define CR_TRE (1 << 28) /* TEX remap enable */ +#define CR_AFE (1 << 29) /* Access flag enable */ +#define CR_TE (1 << 30) /* Thumb exception enable */ /* * This is used to ensure the compiler did actually allocate the register we diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 34e53596ff1d..e63db11f16a8 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -180,20 +180,20 @@ void adjust_cr(unsigned long mask, unsigned long set) #endif #define PROT_PTE_DEVICE L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_WRITE -#define PROT_SECT_DEVICE PMD_TYPE_SECT|PMD_SECT_XN|PMD_SECT_AP_WRITE +#define PROT_SECT_DEVICE PMD_TYPE_SECT|PMD_SECT_AP_WRITE static struct mem_type mem_types[] = { [MT_DEVICE] = { /* Strongly ordered / ARMv6 shared device */ .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED | L_PTE_SHARED, .prot_l1 = PMD_TYPE_TABLE, - .prot_sect = PROT_SECT_DEVICE | PMD_SECT_UNCACHED, + .prot_sect = PROT_SECT_DEVICE | PMD_SECT_S, .domain = DOMAIN_IO, }, [MT_DEVICE_NONSHARED] = { /* ARMv6 non-shared device */ .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_NONSHARED, .prot_l1 = PMD_TYPE_TABLE, - .prot_sect = PROT_SECT_DEVICE | PMD_SECT_TEX(2), + .prot_sect = PROT_SECT_DEVICE, .domain = DOMAIN_IO, }, [MT_DEVICE_CACHED] = { /* ioremap_cached */ @@ -205,7 +205,7 @@ static struct mem_type mem_types[] = { [MT_DEVICE_WC] = { /* ioremap_wc */ .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_WC, .prot_l1 = PMD_TYPE_TABLE, - .prot_sect = PROT_SECT_DEVICE | PMD_SECT_BUFFERABLE, + .prot_sect = PROT_SECT_DEVICE, .domain = DOMAIN_IO, }, [MT_CACHECLEAN] = { @@ -273,22 +273,23 @@ static void __init build_mem_type_table(void) #endif /* - * On non-Xscale3 ARMv5-and-older systems, use CB=01 - * (Uncached/Buffered) for ioremap_wc() mappings. On XScale3 - * and ARMv6+, use TEXCB=00100 mappings (Inner/Outer Uncacheable - * in xsc3 parlance, Uncached Normal in ARMv6 parlance). + * Strip out features not present on earlier architectures. + * Pre-ARMv5 CPUs don't have TEX bits. Pre-ARMv6 CPUs or those + * without extended page tables don't have the 'Shared' bit. */ - if (cpu_is_xsc3() || cpu_arch >= CPU_ARCH_ARMv6) { - mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1); - mem_types[MT_DEVICE_WC].prot_sect &= ~PMD_SECT_BUFFERABLE; - } + if (cpu_arch < CPU_ARCH_ARMv5) + for (i = 0; i < ARRAY_SIZE(mem_types); i++) + mem_types[i].prot_sect &= ~PMD_SECT_TEX(7); + if ((cpu_arch < CPU_ARCH_ARMv6 || !(cr & CR_XP)) && !cpu_is_xsc3()) + for (i = 0; i < ARRAY_SIZE(mem_types); i++) + mem_types[i].prot_sect &= ~PMD_SECT_S; /* - * ARMv5 and lower, bit 4 must be set for page tables. - * (was: cache "update-able on write" bit on ARM610) - * However, Xscale cores require this bit to be cleared. + * ARMv5 and lower, bit 4 must be set for page tables (was: cache + * "update-able on write" bit on ARM610). However, Xscale and + * Xscale3 require this bit to be cleared. */ - if (cpu_is_xscale()) { + if (cpu_is_xscale() || cpu_is_xsc3()) { for (i = 0; i < ARRAY_SIZE(mem_types); i++) { mem_types[i].prot_sect &= ~PMD_BIT4; mem_types[i].prot_l1 &= ~PMD_BIT4; @@ -302,6 +303,64 @@ static void __init build_mem_type_table(void) } } + /* + * Mark the device areas according to the CPU/architecture. + */ + if (cpu_is_xsc3() || (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP))) { + if (!cpu_is_xsc3()) { + /* + * Mark device regions on ARMv6+ as execute-never + * to prevent speculative instruction fetches. + */ + mem_types[MT_DEVICE].prot_sect |= PMD_SECT_XN; + mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_XN; + mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_XN; + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_XN; + } + if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) { + /* + * For ARMv7 with TEX remapping, + * - shared device is SXCB=1100 + * - nonshared device is SXCB=0100 + * - write combine device mem is SXCB=0001 + * (Uncached Normal memory) + */ + mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1); + mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(1); + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE; + } else if (cpu_is_xsc3()) { + /* + * For Xscale3, + * - shared device is TEXCB=00101 + * - nonshared device is TEXCB=01000 + * - write combine device mem is TEXCB=00100 + * (Inner/Outer Uncacheable in xsc3 parlance) + */ + mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1) | PMD_SECT_BUFFERED; + mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2); + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1); + } else { + /* + * For ARMv6 and ARMv7 without TEX remapping, + * - shared device is TEXCB=00001 + * - nonshared device is TEXCB=01000 + * - write combine device mem is TEXCB=00100 + * (Uncached Normal in ARMv6 parlance). + */ + mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED; + mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2); + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1); + } + } else { + /* + * On others, write combining is "Uncached/Buffered" + */ + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE; + } + + /* + * Now deal with the memory-type mappings + */ cp = &cache_policies[cachepolicy]; vecs_pgprot = kern_pgprot = user_pgprot = cp->pte; @@ -317,12 +376,8 @@ static void __init build_mem_type_table(void) * Enable CPU-specific coherency if supported. * (Only available on XSC3 at the moment.) */ - if (arch_is_coherent()) { - if (cpu_is_xsc3()) { - mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; - mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; - } - } + if (arch_is_coherent() && cpu_is_xsc3()) + mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; /* * ARMv6 and above have extended page tables. @@ -336,11 +391,6 @@ static void __init build_mem_type_table(void) mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; - /* - * Mark the device area as "shared device" - */ - mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED; - #ifdef CONFIG_SMP /* * Mark memory with the "shared" attribute for SMP systems @@ -360,9 +410,6 @@ static void __init build_mem_type_table(void) mem_types[MT_LOW_VECTORS].prot_pte |= vecs_pgprot; mem_types[MT_HIGH_VECTORS].prot_pte |= vecs_pgprot; - if (cpu_arch < CPU_ARCH_ARMv5) - mem_types[MT_MINICLEAN].prot_sect &= ~PMD_SECT_TEX(1); - pgprot_user = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot); pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | L_PTE_WRITE | diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 07f82db70945..4d3c0a73e7fb 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -115,7 +115,7 @@ ENTRY(cpu_v7_set_pte_ext) orr r3, r3, r2 orr r3, r3, #PTE_EXT_AP0 | 2 - tst r2, #1 << 4 + tst r1, #1 << 4 orrne r3, r3, #PTE_EXT_TEX(1) tst r1, #L_PTE_WRITE @@ -192,11 +192,11 @@ __v7_setup: mov pc, lr @ return to head.S:__ret ENDPROC(__v7_setup) - /* - * V X F I D LR - * .... ...E PUI. .T.T 4RVI ZFRS BLDP WCAM - * rrrr rrrx xxx0 0101 xxxx xxxx x111 xxxx < forced - * 0 110 0011 1.00 .111 1101 < we want + /* AT + * TFR EV X F I D LR + * .EEE ..EE PUI. .T.T 4RVI ZFRS BLDP WCAM + * rxxx rrxx xxx0 0101 xxxx xxxx x111 xxxx < forced + * 1 0 110 0011 1.00 .111 1101 < we want */ .type v7_crval, #object v7_crval: From c7cf72dcadbe39c2077b32460f86c9f8167be3be Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 6 Nov 2008 17:43:55 -0700 Subject: [PATCH 108/173] [ARM] xsc3: fix xsc3_l2_inv_range MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When 'start' and 'end' are less than a cacheline apart and 'start' is unaligned we are done after cleaning and invalidating the first cacheline. So check for (start < end) which will not walk off into invalid address ranges when (start > end). This issue was caught by drivers/dma/dmatest. 2.6.27 is susceptible. Cc: Cc: Haavard Skinnemoen Cc: Lothar WaÃ<9f>mann Cc: Lennert Buytenhek Cc: Eric Miao Signed-off-by: Dan Williams --- arch/arm/mm/cache-xsc3l2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mm/cache-xsc3l2.c b/arch/arm/mm/cache-xsc3l2.c index 10b1bae1a258..464de893a988 100644 --- a/arch/arm/mm/cache-xsc3l2.c +++ b/arch/arm/mm/cache-xsc3l2.c @@ -98,7 +98,7 @@ static void xsc3_l2_inv_range(unsigned long start, unsigned long end) /* * Clean and invalidate partial last cache line. */ - if (end & (CACHE_LINE_SIZE - 1)) { + if (start < end && (end & (CACHE_LINE_SIZE - 1))) { xsc3_l2_clean_pa(end & ~(CACHE_LINE_SIZE - 1)); xsc3_l2_inv_pa(end & ~(CACHE_LINE_SIZE - 1)); end &= ~(CACHE_LINE_SIZE - 1); @@ -107,7 +107,7 @@ static void xsc3_l2_inv_range(unsigned long start, unsigned long end) /* * Invalidate all full cache lines between 'start' and 'end'. */ - while (start != end) { + while (start < end) { xsc3_l2_inv_pa(start); start += CACHE_LINE_SIZE; } From 934f6c3f8e7f5d6a6d07ae2df283fd02393019dd Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Thu, 6 Nov 2008 15:49:04 -0500 Subject: [PATCH 109/173] Revert "ath5k: honor FIF_BCN_PRBRESP_PROMISC in STA mode" Unfortunately, the result was that mac80211 didn't see all the beacons it actually wanted to see. This caused lost associations. Hopefully we can revisit this when mac80211 is less greedy about seeing beacons directly... This reverts commit 063279062a8c530cc90fb77797db16c49c905b26. Signed-off-by: John W. Linville --- drivers/net/wireless/ath5k/base.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath5k/base.c b/drivers/net/wireless/ath5k/base.c index 9e47d727e220..cfd4d052d666 100644 --- a/drivers/net/wireless/ath5k/base.c +++ b/drivers/net/wireless/ath5k/base.c @@ -2942,8 +2942,10 @@ static void ath5k_configure_filter(struct ieee80211_hw *hw, sc->opmode != NL80211_IFTYPE_MESH_POINT && test_bit(ATH_STAT_PROMISC, sc->status)) rfilt |= AR5K_RX_FILTER_PROM; - if (sc->opmode == NL80211_IFTYPE_ADHOC) + if (sc->opmode == NL80211_IFTYPE_STATION || + sc->opmode == NL80211_IFTYPE_ADHOC) { rfilt |= AR5K_RX_FILTER_BEACON; + } /* Set filters */ ath5k_hw_set_rx_filter(ah,rfilt); From 502c12e1ef14967e08dabb04c674cf0f000e8f7e Mon Sep 17 00:00:00 2001 From: Mohamed Abbas Date: Thu, 23 Oct 2008 23:48:54 -0700 Subject: [PATCH 110/173] iwlwifi: clear scanning bits upon failure In iwl_bg_request_scan function, if we could not send a scan command it will go to done. In done it does the right thing to call mac80211 with scan complete, but the problem is STATUS_SCAN_HW is still set causing any future scan to fail. Fix by clearing the scanning status bits if scan fails. Signed-off-by: Mohamed Abbas Signed-off-by: Reinette Chatre Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-scan.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/wireless/iwlwifi/iwl-scan.c b/drivers/net/wireless/iwlwifi/iwl-scan.c index 3b0bee331a33..c89365e2ca58 100644 --- a/drivers/net/wireless/iwlwifi/iwl-scan.c +++ b/drivers/net/wireless/iwlwifi/iwl-scan.c @@ -896,6 +896,13 @@ static void iwl_bg_request_scan(struct work_struct *data) return; done: + /* Cannot perform scan. Make sure we clear scanning + * bits from status so next scan request can be performed. + * If we don't clear scanning status bit here all next scan + * will fail + */ + clear_bit(STATUS_SCAN_HW, &priv->status); + clear_bit(STATUS_SCANNING, &priv->status); /* inform mac80211 scan aborted */ queue_work(priv->workqueue, &priv->scan_completed); mutex_unlock(&priv->mutex); From 964d2777438bf7687324243d38ade538d9bbfe3c Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Thu, 30 Oct 2008 14:12:21 -0400 Subject: [PATCH 111/173] iwlagn: avoid sleep in softirq context __ieee80211_tasklet_handler -> __ieee80211_rx -> __ieee80211_rx_handle_packet -> ieee80211_invoke_rx_handlers -> ieee80211_rx_h_decrypt -> ieee80211_crypto_tkip_decrypt -> ieee80211_tkip_decrypt_data -> iwl4965_mac_update_tkip_key -> iwl_scan_cancel_timeout -> msleep Ooops! Avoid the sleep by changing iwl_scan_cancel_timeout with iwl_scan_cancel and simply returning on failure if the scan persists. This will cause hardware decryption to fail and we'll handle a few more frames with software decryption. Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-agn.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c index 321dbc8c034a..8d690a0eb1a9 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn.c @@ -3252,7 +3252,11 @@ static void iwl4965_mac_update_tkip_key(struct ieee80211_hw *hw, return; } - iwl_scan_cancel_timeout(priv, 100); + if (iwl_scan_cancel(priv)) { + /* cancel scan failed, just live w/ bad key and rely + briefly on SW decryption */ + return; + } key_flags |= (STA_KEY_FLG_TKIP | STA_KEY_FLG_MAP_KEY_MSK); key_flags |= cpu_to_le16(keyconf->keyidx << STA_KEY_FLG_KEYID_POS); From 4a9d916717de0aab4313d43817164577255242fb Mon Sep 17 00:00:00 2001 From: Jonathan McDowell Date: Thu, 30 Oct 2008 22:46:48 +0000 Subject: [PATCH 112/173] Fix logic error in rfkill_check_duplicity > I'll have a prod at why the [hso] rfkill stuff isn't working next Ok, I believe this is due to the addition of rfkill_check_duplicity in rfkill and the fact that test_bit actually returns a negative value rather than the postive one expected (which is of course equally true). So when the second WLAN device (the hso device, with the EEE PC WLAN being the first) comes along rfkill_check_duplicity returns a negative value and so rfkill_register returns an error. Patch below fixes this for me. Signed-Off-By: Jonathan McDowell Acked-by: Henrique de Moraes Holschuh Signed-off-by: John W. Linville --- net/rfkill/rfkill.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c index f949a482b007..25ba3bd57e66 100644 --- a/net/rfkill/rfkill.c +++ b/net/rfkill/rfkill.c @@ -603,7 +603,7 @@ static int rfkill_check_duplicity(const struct rfkill *rfkill) } /* 0: first switch of its kind */ - return test_bit(rfkill->type, seen); + return (test_bit(rfkill->type, seen)) ? 1 : 0; } static int rfkill_add_switch(struct rfkill *rfkill) From 0feec9dfe7b8880ab3b4c38d7cc4107dd706ea7f Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Sat, 1 Nov 2008 17:03:48 +0000 Subject: [PATCH 113/173] zd1211rw: Add 2 device IDs 07fa/1196 Bewan BWIFI-USB54AR: Tested by night1308, this device is a ZD1211B with an AL2230S radio. 0ace/b215 HP 802.11abg: Tested by Robert Philippe Signed-off-by: John W. Linville --- drivers/net/wireless/zd1211rw/zd_usb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/zd1211rw/zd_usb.c b/drivers/net/wireless/zd1211rw/zd_usb.c index a60ae86bd5c9..a3ccd8c1c716 100644 --- a/drivers/net/wireless/zd1211rw/zd_usb.c +++ b/drivers/net/wireless/zd1211rw/zd_usb.c @@ -61,6 +61,7 @@ static struct usb_device_id usb_ids[] = { { USB_DEVICE(0x0105, 0x145f), .driver_info = DEVICE_ZD1211 }, /* ZD1211B */ { USB_DEVICE(0x0ace, 0x1215), .driver_info = DEVICE_ZD1211B }, + { USB_DEVICE(0x0ace, 0xb215), .driver_info = DEVICE_ZD1211B }, { USB_DEVICE(0x157e, 0x300d), .driver_info = DEVICE_ZD1211B }, { USB_DEVICE(0x079b, 0x0062), .driver_info = DEVICE_ZD1211B }, { USB_DEVICE(0x1582, 0x6003), .driver_info = DEVICE_ZD1211B }, @@ -82,6 +83,7 @@ static struct usb_device_id usb_ids[] = { { USB_DEVICE(0x0cde, 0x001a), .driver_info = DEVICE_ZD1211B }, { USB_DEVICE(0x0586, 0x340a), .driver_info = DEVICE_ZD1211B }, { USB_DEVICE(0x0471, 0x1237), .driver_info = DEVICE_ZD1211B }, + { USB_DEVICE(0x07fa, 0x1196), .driver_info = DEVICE_ZD1211B }, /* "Driverless" devices that need ejecting */ { USB_DEVICE(0x0ace, 0x2011), .driver_info = DEVICE_INSTALLER }, { USB_DEVICE(0x0ace, 0x20ff), .driver_info = DEVICE_INSTALLER }, From c793033945bea23d7a6e0d8d94b2da6603e02af2 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Mon, 3 Nov 2008 22:14:00 -0500 Subject: [PATCH 114/173] ath5k: correct handling of rx status fields ath5k_rx_status fields rs_antenna and rs_more are u8s, but we were setting them with bitwise ANDs of 32-bit values. As a consequence, jumbo frames would not be discarded as intended. Then, because the hw rate value of such frames is zero, and, since "ath5k: rates cleanup", we do not fall back to the basic rate, such packets would trigger the following WARN_ON: ------------[ cut here ]------------ WARNING: at net/mac80211/rx.c:2192 __ieee80211_rx+0x4d/0x57e [mac80211]() Modules linked in: ath5k af_packet sha256_generic aes_i586 aes_generic cbc loop i915 drm binfmt_misc acpi_cpufreq fan container nls_utf8 hfsplus dm_crypt dm_mod kvm_intel kvm fuse sbp2 snd_hda_intel snd_pcm_oss snd_pcm snd_mixer_oss snd_seq_dummy snd_seq_oss arc4 joydev hid_apple ecb snd_seq_midi snd_rawmidi snd_seq_midi_event snd_seq snd_timer snd_seq_device usbhid appletouch mac80211 sky2 snd ehci_hcd ohci1394 bitrev crc32 sr_mod cdrom rtc sg uhci_hcd snd_page_alloc cfg80211 ieee1394 thermal ac battery processor button evdev unix [last unloaded: ath5k] Pid: 0, comm: swapper Tainted: G W 2.6.28-rc2-wl #14 Call Trace: [] warn_on_slowpath+0x41/0x5b [] ? sched_debug_show+0x31e/0x9c6 [] ? vprintk+0x369/0x389 [] ? _spin_unlock_irqrestore+0x54/0x58 [] ? try_to_wake_up+0x14f/0x15a [] __ieee80211_rx+0x4d/0x57e [mac80211] [] ath5k_tasklet_rx+0x5a1/0x5e4 [ath5k] [] ? clockevents_program_event+0xd4/0xe3 [] tasklet_action+0x94/0xfd [] __do_softirq+0x8c/0x13e [] do_softirq+0x39/0x55 [] irq_exit+0x46/0x85 [] do_IRQ+0x9a/0xb2 [] common_interrupt+0x28/0x30 [] ? acpi_idle_enter_bm+0x2ad/0x31b [processor] [] cpuidle_idle_call+0x65/0x9a [] cpu_idle+0x76/0xa6 [] rest_init+0x62/0x64 Signed-off-by: Bob Copeland Signed-off-by: John W. Linville --- drivers/net/wireless/ath5k/desc.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/ath5k/desc.c b/drivers/net/wireless/ath5k/desc.c index dd1374052ba9..5e362a7a3620 100644 --- a/drivers/net/wireless/ath5k/desc.c +++ b/drivers/net/wireless/ath5k/desc.c @@ -531,10 +531,10 @@ static int ath5k_hw_proc_5210_rx_status(struct ath5k_hw *ah, AR5K_5210_RX_DESC_STATUS0_RECEIVE_SIGNAL); rs->rs_rate = AR5K_REG_MS(rx_status->rx_status_0, AR5K_5210_RX_DESC_STATUS0_RECEIVE_RATE); - rs->rs_antenna = rx_status->rx_status_0 & - AR5K_5210_RX_DESC_STATUS0_RECEIVE_ANTENNA; - rs->rs_more = rx_status->rx_status_0 & - AR5K_5210_RX_DESC_STATUS0_MORE; + rs->rs_antenna = AR5K_REG_MS(rx_status->rx_status_0, + AR5K_5210_RX_DESC_STATUS0_RECEIVE_ANTENNA); + rs->rs_more = !!(rx_status->rx_status_0 & + AR5K_5210_RX_DESC_STATUS0_MORE); /* TODO: this timestamp is 13 bit, later on we assume 15 bit */ rs->rs_tstamp = AR5K_REG_MS(rx_status->rx_status_1, AR5K_5210_RX_DESC_STATUS1_RECEIVE_TIMESTAMP); @@ -607,10 +607,10 @@ static int ath5k_hw_proc_5212_rx_status(struct ath5k_hw *ah, AR5K_5212_RX_DESC_STATUS0_RECEIVE_SIGNAL); rs->rs_rate = AR5K_REG_MS(rx_status->rx_status_0, AR5K_5212_RX_DESC_STATUS0_RECEIVE_RATE); - rs->rs_antenna = rx_status->rx_status_0 & - AR5K_5212_RX_DESC_STATUS0_RECEIVE_ANTENNA; - rs->rs_more = rx_status->rx_status_0 & - AR5K_5212_RX_DESC_STATUS0_MORE; + rs->rs_antenna = AR5K_REG_MS(rx_status->rx_status_0, + AR5K_5212_RX_DESC_STATUS0_RECEIVE_ANTENNA); + rs->rs_more = !!(rx_status->rx_status_0 & + AR5K_5212_RX_DESC_STATUS0_MORE); rs->rs_tstamp = AR5K_REG_MS(rx_status->rx_status_1, AR5K_5212_RX_DESC_STATUS1_RECEIVE_TIMESTAMP); rs->rs_status = 0; From 2420ebc104d38567ee977a3c15dc675a9dd3b07c Mon Sep 17 00:00:00 2001 From: Mohamed Abbas Date: Tue, 4 Nov 2008 12:21:34 -0800 Subject: [PATCH 115/173] iwl3945: clear scanning bits upon failure This patch ensures we clear any scan status bit when an error occurs while sending the scan command. It is the implementation of patch: "iwlwifi: clear scanning bits upon failure" for iwl3945. Signed-off-by: Mohamed Abbas Signed-off-by: Reinette Chatre Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl3945-base.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index d15a2c997954..b9eac5551d82 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -6273,6 +6273,14 @@ static void iwl3945_bg_request_scan(struct work_struct *data) return; done: + /* can not perform scan make sure we clear scanning + * bits from status so next scan request can be performed. + * if we dont clear scanning status bit here all next scan + * will fail + */ + clear_bit(STATUS_SCAN_HW, &priv->status); + clear_bit(STATUS_SCANNING, &priv->status); + /* inform mac80211 scan aborted */ queue_work(priv->workqueue, &priv->scan_completed); mutex_unlock(&priv->mutex); From 14b5433606289dbc5b6fd70ced11462f80e95003 Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Tue, 4 Nov 2008 12:21:35 -0800 Subject: [PATCH 116/173] iwl3945: do not send scan command if channel count zero Do not send scan command if no channels to scan. This avoids a Microcode error as reported in: http://www.intellinuxwireless.org/bugzilla/show_bug.cgi?id=1650 http://bugzilla.kernel.org/show_bug.cgi?id=11806 http://marc.info/?l=linux-wireless&m=122437145211886&w=2 Signed-off-by: Reinette Chatre Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl3945-base.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index b9eac5551d82..81dfcb882857 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -6256,6 +6256,11 @@ static void iwl3945_bg_request_scan(struct work_struct *data) n_probes, (void *)&scan->data[le16_to_cpu(scan->tx_cmd.len)]); + if (scan->channel_count == 0) { + IWL_DEBUG_SCAN("channel count %d\n", scan->channel_count); + goto done; + } + cmd.len += le16_to_cpu(scan->tx_cmd.len) + scan->channel_count * sizeof(struct iwl3945_scan_channel); cmd.data = scan; From d54bc4e3fc5c56600a13c9ebc0a7e1077ac05d59 Mon Sep 17 00:00:00 2001 From: "Zhu, Yi" Date: Tue, 4 Nov 2008 12:21:36 -0800 Subject: [PATCH 117/173] iwl3945: fix deadlock on suspend This patch fixes iwl3945 deadlock during suspend by moving notify_mac out of iwl3945 mutex. This is a portion of the same fix for iwlwifi by Tomas. Signed-off-by: Zhu Yi Signed-off-by: Tomas Winkler Signed-off-by: Reinette Chatre Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl3945-base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index 81dfcb882857..285b53e7e261 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -5768,7 +5768,6 @@ static void iwl3945_alive_start(struct iwl3945_priv *priv) if (priv->error_recovering) iwl3945_error_recovery(priv); - ieee80211_notify_mac(priv->hw, IEEE80211_NOTIFY_RE_ASSOC); return; restart: @@ -6013,6 +6012,7 @@ static void iwl3945_bg_alive_start(struct work_struct *data) mutex_lock(&priv->mutex); iwl3945_alive_start(priv); mutex_unlock(&priv->mutex); + ieee80211_notify_mac(priv->hw, IEEE80211_NOTIFY_RE_ASSOC); } static void iwl3945_bg_rf_kill(struct work_struct *work) From f8d570a4745835f2238a33b537218a1bb03fc671 Mon Sep 17 00:00:00 2001 From: David Miller Date: Thu, 6 Nov 2008 00:37:40 -0800 Subject: [PATCH 118/173] net: Fix recursive descent in __scm_destroy(). __scm_destroy() walks the list of file descriptors in the scm_fp_list pointed to by the scm_cookie argument. Those, in turn, can close sockets and invoke __scm_destroy() again. There is nothing which limits how deeply this can occur. The idea for how to fix this is from Linus. Basically, we do all of the fput()s at the top level by collecting all of the scm_fp_list objects hit by an fput(). Inside of the initial __scm_destroy() we keep running the list until it is empty. Signed-off-by: David S. Miller Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 ++ include/net/scm.h | 5 +++-- net/core/scm.c | 24 +++++++++++++++++++++--- 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index b483f39a7112..295b7c756ca6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1349,6 +1349,8 @@ struct task_struct { */ unsigned long timer_slack_ns; unsigned long default_timer_slack_ns; + + struct list_head *scm_work_list; }; /* diff --git a/include/net/scm.h b/include/net/scm.h index 06df126103ca..33e9986beb86 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -14,8 +14,9 @@ struct scm_fp_list { - int count; - struct file *fp[SCM_MAX_FD]; + struct list_head list; + int count; + struct file *fp[SCM_MAX_FD]; }; struct scm_cookie diff --git a/net/core/scm.c b/net/core/scm.c index 10f5c65f6a47..ab242cc1acca 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -75,6 +75,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) if (!fpl) return -ENOMEM; *fplp = fpl; + INIT_LIST_HEAD(&fpl->list); fpl->count = 0; } fpp = &fpl->fp[fpl->count]; @@ -106,9 +107,25 @@ void __scm_destroy(struct scm_cookie *scm) if (fpl) { scm->fp = NULL; - for (i=fpl->count-1; i>=0; i--) - fput(fpl->fp[i]); - kfree(fpl); + if (current->scm_work_list) { + list_add_tail(&fpl->list, current->scm_work_list); + } else { + LIST_HEAD(work_list); + + current->scm_work_list = &work_list; + + list_add(&fpl->list, &work_list); + while (!list_empty(&work_list)) { + fpl = list_first_entry(&work_list, struct scm_fp_list, list); + + list_del(&fpl->list); + for (i=fpl->count-1; i>=0; i--) + fput(fpl->fp[i]); + kfree(fpl); + } + + current->scm_work_list = NULL; + } } } @@ -284,6 +301,7 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL); if (new_fpl) { + INIT_LIST_HEAD(&new_fpl->list); for (i=fpl->count-1; i>=0; i--) get_file(fpl->fp[i]); memcpy(new_fpl, fpl, sizeof(*fpl)); From 77ca7286d10b798e4907af941f29672bf484db77 Mon Sep 17 00:00:00 2001 From: Mike Miller Date: Thu, 6 Nov 2008 12:53:14 -0800 Subject: [PATCH 119/173] cciss: new hardware support Add support for 2 new SAS/SATA controllers. Signed-off-by: Mike Miller Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cciss.txt | 2 ++ drivers/block/cciss.c | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/Documentation/cciss.txt b/Documentation/cciss.txt index 8244c6442faa..48d80d95f0f8 100644 --- a/Documentation/cciss.txt +++ b/Documentation/cciss.txt @@ -26,6 +26,8 @@ This driver is known to work with the following cards: * SA P410i * SA P411 * SA P812 + * SA P712m + * SA P711m Detecting drive failures: ------------------------- diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 4023885353e0..00048bd26e7d 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -96,6 +96,8 @@ static const struct pci_device_id cciss_pci_device_id[] = { {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3245}, {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3247}, {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3249}, + {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x324A}, + {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x324B}, {PCI_VENDOR_ID_HP, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_STORAGE_RAID << 8, 0xffff << 8, 0}, {0,} @@ -133,6 +135,8 @@ static struct board_type products[] = { {0x3245103C, "Smart Array P410i", &SA5_access}, {0x3247103C, "Smart Array P411", &SA5_access}, {0x3249103C, "Smart Array P812", &SA5_access}, + {0x324A103C, "Smart Array P712m", &SA5_access}, + {0x324B103C, "Smart Array P711m", &SA5_access}, {0xFFFF103C, "Unknown Smart Array", &SA5_access}, }; From 2197d18ded232ef6eef63cce57b6b21eddf1b7b6 Mon Sep 17 00:00:00 2001 From: Andrey Borzenkov Date: Thu, 6 Nov 2008 12:53:15 -0800 Subject: [PATCH 120/173] cpqarry: fix return value of cpqarray_init() As reported by Dick Gevers on Compaq ProLiant: Oct 13 18:06:51 dvgcpl kernel: Compaq SMART2 Driver (v 2.6.0) Oct 13 18:06:51 dvgcpl kernel: sys_init_module: 'cpqarray'->init suspiciously returned 1, it should follow 0/-E convention Oct 13 18:06:51 dvgcpl kernel: sys_init_module: loading module anyway... Oct 13 18:06:51 dvgcpl kernel: Pid: 315, comm: modprobe Not tainted 2.6.27-desktop-0.rc8.2mnb #1 Oct 13 18:06:51 dvgcpl kernel: [] ? printk+0x18/0x1e Oct 13 18:06:51 dvgcpl kernel: [] sys_init_module+0x155/0x1c0 Oct 13 18:06:51 dvgcpl kernel: [] syscall_call+0x7/0xb Oct 13 18:06:51 dvgcpl kernel: ======================= Make it return 0 on success and -ENODEV if no array was found. Reported-by: Dick Gevers Signed-off-by: Andrey Borzenkov Cc: Jens Axboe Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/cpqarray.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index 47d233c6d0b3..5d39df14ed90 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -567,7 +567,12 @@ static int __init cpqarray_init(void) num_cntlrs_reg++; } - return(num_cntlrs_reg); + if (num_cntlrs_reg) + return 0; + else { + pci_unregister_driver(&cpqarray_pci_driver); + return -ENODEV; + } } /* Function to find the first free pointer into our hba[] array */ From a564738c1c9c7b9ed696bf4116267789201ac8ac Mon Sep 17 00:00:00 2001 From: Wolfgang Kroworsch Date: Thu, 6 Nov 2008 12:53:16 -0800 Subject: [PATCH 121/173] vt: incomplete initialization of vc_tab_stop Problem 1 (see patch below): vc_tab_stop is declared as an array of 8 unsigned ints in struct vc_data in include/linux/console_struct.h . In drivers/char/vt.c only 5 of these 8 unsigned ints get initialized leading to unintended tabulator placement on displays with more than 160 columns text. Problem 2 (open): Upcoming displays will have more than 256 columns of text leading to invalid memory access in drivers/char/vt.c during tabulator calculations: if (vc->vc_tab_stop[vc->vc_x >> 5] & (1 << (vc->vc_x & 31))) break; Signed-off-by: Wolfgang Kroworsch Cc: Alan Cox Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/vt.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/char/vt.c b/drivers/char/vt.c index d8f83e26e4a4..a5af6072e2b3 100644 --- a/drivers/char/vt.c +++ b/drivers/char/vt.c @@ -1644,7 +1644,10 @@ static void reset_terminal(struct vc_data *vc, int do_clear) vc->vc_tab_stop[1] = vc->vc_tab_stop[2] = vc->vc_tab_stop[3] = - vc->vc_tab_stop[4] = 0x01010101; + vc->vc_tab_stop[4] = + vc->vc_tab_stop[5] = + vc->vc_tab_stop[6] = + vc->vc_tab_stop[7] = 0x01010101; vc->vc_bell_pitch = DEFAULT_BELL_PITCH; vc->vc_bell_duration = DEFAULT_BELL_DURATION; @@ -1935,7 +1938,10 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c) vc->vc_tab_stop[1] = vc->vc_tab_stop[2] = vc->vc_tab_stop[3] = - vc->vc_tab_stop[4] = 0; + vc->vc_tab_stop[4] = + vc->vc_tab_stop[5] = + vc->vc_tab_stop[6] = + vc->vc_tab_stop[7] = 0; } return; case 'm': From 9e3a4afd5a66f9047e30ba225525e6ff01612dc4 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 6 Nov 2008 12:53:18 -0800 Subject: [PATCH 122/173] rtc: fix handling of missing tm_year data when reading alarms When fixing up invalid years rtc_read_alarm() was calling rtc_valid_tm() as a boolean but rtc_valid_tm() returns zero on success or a negative number if the time is not valid so the test was inverted. Signed-off-by: Mark Brown Acked-by: Alessandro Zummo Cc: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/interface.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index 7af60b98d8a4..a04c1b6b1575 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -271,7 +271,7 @@ int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) dev_dbg(&rtc->dev, "alarm rollover: %s\n", "year"); do { alarm->time.tm_year++; - } while (!rtc_valid_tm(&alarm->time)); + } while (rtc_valid_tm(&alarm->time) != 0); break; default: From 6e3530fa241ae759313496f67295c9252691ed04 Mon Sep 17 00:00:00 2001 From: Henrik Rydberg Date: Thu, 6 Nov 2008 12:53:19 -0800 Subject: [PATCH 123/173] hwmon: applesmc: add support for iMac 5 Add temperature sensor support for iMac 5. Signed-off-by: Henrik Rydberg Tested-by: Ricky Campbell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/hwmon/applesmc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c index bc011da79e14..80d545d3aa12 100644 --- a/drivers/hwmon/applesmc.c +++ b/drivers/hwmon/applesmc.c @@ -116,6 +116,8 @@ static const char* temperature_sensors_sets[][36] = { /* Set 9: Macbook Pro 3,1 (Santa Rosa) */ { "TALP", "TB0T", "TC0D", "TC0P", "TG0D", "TG0H", "TTF0", "TW0P", "Th0H", "Th1H", "Th2H", "Tm0P", "Ts0P", NULL }, +/* Set 10: iMac 5,1 */ + { "TA0P", "TC0D", "TC0P", "TG0D", "TH0P", "TO0P", "Tm0P", NULL }, }; /* List of keys used to read/write fan speeds */ @@ -1276,6 +1278,8 @@ static __initdata struct dmi_match_data applesmc_dmi_data[] = { { .accelerometer = 1, .light = 1, .temperature_set = 8 }, /* MacBook Pro 3: accelerometer, backlight and temperature set 9 */ { .accelerometer = 1, .light = 1, .temperature_set = 9 }, +/* iMac 5: light sensor only, temperature set 10 */ + { .accelerometer = 0, .light = 0, .temperature_set = 10 }, }; /* Note that DMI_MATCH(...,"MacBook") will match "MacBookPro1,1". @@ -1317,6 +1321,10 @@ static __initdata struct dmi_system_id applesmc_whitelist[] = { DMI_MATCH(DMI_BOARD_VENDOR,"Apple"), DMI_MATCH(DMI_PRODUCT_NAME,"MacPro2") }, &applesmc_dmi_data[4]}, + { applesmc_dmi_match, "Apple iMac 5", { + DMI_MATCH(DMI_BOARD_VENDOR, "Apple"), + DMI_MATCH(DMI_PRODUCT_NAME, "iMac5") }, + &applesmc_dmi_data[10]}, { applesmc_dmi_match, "Apple iMac", { DMI_MATCH(DMI_BOARD_VENDOR,"Apple"), DMI_MATCH(DMI_PRODUCT_NAME,"iMac") }, From 181209a1d91756bfd83b1d6ce2008cea3ca225b6 Mon Sep 17 00:00:00 2001 From: Henrik Rydberg Date: Thu, 6 Nov 2008 12:53:20 -0800 Subject: [PATCH 124/173] hwmon: applesmc: add support for Macbook 5 Add accelerometer, backlight and temperature sensor support for the new unibody Macbook 5. Signed-off-by: Henrik Rydberg Tested-by: David M. Lary Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/hwmon/applesmc.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c index 80d545d3aa12..074f7f4719f3 100644 --- a/drivers/hwmon/applesmc.c +++ b/drivers/hwmon/applesmc.c @@ -118,6 +118,9 @@ static const char* temperature_sensors_sets[][36] = { "Th0H", "Th1H", "Th2H", "Tm0P", "Ts0P", NULL }, /* Set 10: iMac 5,1 */ { "TA0P", "TC0D", "TC0P", "TG0D", "TH0P", "TO0P", "Tm0P", NULL }, +/* Set 11: Macbook 5,1 */ + { "TB0T", "TB1T", "TB2T", "TB3T", "TC0D", "TC0P", "TN0D", "TN0P", + "TTF0", "Th0H", "Th1H", "ThFH", "Ts0P", "Ts0S", NULL }, }; /* List of keys used to read/write fan speeds */ @@ -1280,6 +1283,8 @@ static __initdata struct dmi_match_data applesmc_dmi_data[] = { { .accelerometer = 1, .light = 1, .temperature_set = 9 }, /* iMac 5: light sensor only, temperature set 10 */ { .accelerometer = 0, .light = 0, .temperature_set = 10 }, +/* MacBook 5: accelerometer, backlight and temperature set 11 */ + { .accelerometer = 1, .light = 1, .temperature_set = 11 }, }; /* Note that DMI_MATCH(...,"MacBook") will match "MacBookPro1,1". @@ -1309,6 +1314,10 @@ static __initdata struct dmi_system_id applesmc_whitelist[] = { DMI_MATCH(DMI_BOARD_VENDOR,"Apple"), DMI_MATCH(DMI_PRODUCT_NAME,"MacBook3") }, &applesmc_dmi_data[6]}, + { applesmc_dmi_match, "Apple MacBook 5", { + DMI_MATCH(DMI_BOARD_VENDOR, "Apple"), + DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5") }, + &applesmc_dmi_data[11]}, { applesmc_dmi_match, "Apple MacBook", { DMI_MATCH(DMI_BOARD_VENDOR,"Apple"), DMI_MATCH(DMI_PRODUCT_NAME,"MacBook") }, From a66603257bf88bbe2c9fd6a97ee5dc24de15d196 Mon Sep 17 00:00:00 2001 From: Henrik Rydberg Date: Thu, 6 Nov 2008 12:53:21 -0800 Subject: [PATCH 125/173] hwmon: applesmc: add support for Macbook Pro 5 Add accelerometer, backlight and temperature sensor support for the new unibody Macbook Pro 5. Signed-off-by: Henrik Rydberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/hwmon/applesmc.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c index 074f7f4719f3..9f04283beaee 100644 --- a/drivers/hwmon/applesmc.c +++ b/drivers/hwmon/applesmc.c @@ -121,6 +121,10 @@ static const char* temperature_sensors_sets[][36] = { /* Set 11: Macbook 5,1 */ { "TB0T", "TB1T", "TB2T", "TB3T", "TC0D", "TC0P", "TN0D", "TN0P", "TTF0", "Th0H", "Th1H", "ThFH", "Ts0P", "Ts0S", NULL }, +/* Set 12: Macbook Pro 5,1 */ + { "TB0T", "TB1T", "TB2T", "TB3T", "TC0D", "TC0F", "TC0P", "TG0D", + "TG0F", "TG0H", "TG0P", "TG0T", "TG1H", "TN0D", "TN0P", "TTF0", + "Th2H", "Tm0P", "Ts0P", "Ts0S", NULL }, }; /* List of keys used to read/write fan speeds */ @@ -1285,6 +1289,8 @@ static __initdata struct dmi_match_data applesmc_dmi_data[] = { { .accelerometer = 0, .light = 0, .temperature_set = 10 }, /* MacBook 5: accelerometer, backlight and temperature set 11 */ { .accelerometer = 1, .light = 1, .temperature_set = 11 }, +/* MacBook Pro 5: accelerometer, backlight and temperature set 12 */ + { .accelerometer = 1, .light = 1, .temperature_set = 12 }, }; /* Note that DMI_MATCH(...,"MacBook") will match "MacBookPro1,1". @@ -1294,6 +1300,10 @@ static __initdata struct dmi_system_id applesmc_whitelist[] = { DMI_MATCH(DMI_BOARD_VENDOR, "Apple"), DMI_MATCH(DMI_PRODUCT_NAME, "MacBookAir") }, &applesmc_dmi_data[7]}, + { applesmc_dmi_match, "Apple MacBook Pro 5", { + DMI_MATCH(DMI_BOARD_VENDOR, "Apple"), + DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5") }, + &applesmc_dmi_data[12]}, { applesmc_dmi_match, "Apple MacBook Pro 4", { DMI_MATCH(DMI_BOARD_VENDOR, "Apple"), DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro4") }, From eefc488f96cdde6e152b45675b50bf380b95d99f Mon Sep 17 00:00:00 2001 From: Henrik Rydberg Date: Thu, 6 Nov 2008 12:53:22 -0800 Subject: [PATCH 126/173] hwmon: applesmc: add support for iMac 8 Add temperature sensor support for iMac 8. Signed-off-by: Henrik Rydberg Tested-by: Klaus Doblmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/hwmon/applesmc.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c index 9f04283beaee..be3285912cb7 100644 --- a/drivers/hwmon/applesmc.c +++ b/drivers/hwmon/applesmc.c @@ -125,6 +125,9 @@ static const char* temperature_sensors_sets[][36] = { { "TB0T", "TB1T", "TB2T", "TB3T", "TC0D", "TC0F", "TC0P", "TG0D", "TG0F", "TG0H", "TG0P", "TG0T", "TG1H", "TN0D", "TN0P", "TTF0", "Th2H", "Tm0P", "Ts0P", "Ts0S", NULL }, +/* Set 13: iMac 8,1 */ + { "TA0P", "TC0D", "TC0H", "TC0P", "TG0D", "TG0H", "TG0P", "TH0P", + "TL0P", "TO0P", "TW0P", "Tm0P", "Tp0P", NULL }, }; /* List of keys used to read/write fan speeds */ @@ -1291,6 +1294,8 @@ static __initdata struct dmi_match_data applesmc_dmi_data[] = { { .accelerometer = 1, .light = 1, .temperature_set = 11 }, /* MacBook Pro 5: accelerometer, backlight and temperature set 12 */ { .accelerometer = 1, .light = 1, .temperature_set = 12 }, +/* iMac 8: light sensor only, temperature set 13 */ + { .accelerometer = 0, .light = 0, .temperature_set = 13 }, }; /* Note that DMI_MATCH(...,"MacBook") will match "MacBookPro1,1". @@ -1340,6 +1345,10 @@ static __initdata struct dmi_system_id applesmc_whitelist[] = { DMI_MATCH(DMI_BOARD_VENDOR,"Apple"), DMI_MATCH(DMI_PRODUCT_NAME,"MacPro2") }, &applesmc_dmi_data[4]}, + { applesmc_dmi_match, "Apple iMac 8", { + DMI_MATCH(DMI_BOARD_VENDOR, "Apple"), + DMI_MATCH(DMI_PRODUCT_NAME, "iMac8") }, + &applesmc_dmi_data[13]}, { applesmc_dmi_match, "Apple iMac 5", { DMI_MATCH(DMI_BOARD_VENDOR, "Apple"), DMI_MATCH(DMI_PRODUCT_NAME, "iMac5") }, From bc9c4068388eea01d3b5da31016879f2341ecec5 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Thu, 6 Nov 2008 12:53:22 -0800 Subject: [PATCH 127/173] autofs4: correct offset mount expire check When checking a directory tree in autofs_tree_busy() we can incorrectly decide that the tree isn't busy. This happens for the case of an active offset mount as autofs4_follow_mount() follows past the active offset mount, which has an open file handle used for expires, causing the file handle not to count toward the busyness check. Signed-off-by: Ian Kent Signed-off-by: Jeff Moyer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs4/expire.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index cde2f8e8935a..4b6fb3f628c0 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -56,12 +56,23 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) mntget(mnt); dget(dentry); - if (!autofs4_follow_mount(&mnt, &dentry)) + if (!follow_down(&mnt, &dentry)) goto done; - /* This is an autofs submount, we can't expire it */ - if (is_autofs4_dentry(dentry)) - goto done; + if (is_autofs4_dentry(dentry)) { + struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); + + /* This is an autofs submount, we can't expire it */ + if (sbi->type == AUTOFS_TYPE_INDIRECT) + goto done; + + /* + * Otherwise it's an offset mount and we need to check + * if we can umount its mount, if there is one. + */ + if (!d_mountpoint(dentry)) + goto done; + } /* Update the expiry counter if fs is busy */ if (!may_umount_tree(mnt)) { From 96b0317906690997c16c7efffbc4c0fafcd6f7f2 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Thu, 6 Nov 2008 12:53:23 -0800 Subject: [PATCH 128/173] autofs4: collect version check return The function check_dev_ioctl_version() returns an error code upon fail but it isn't captured and returned in validate_dev_ioctl() as it should be. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Ian Kent Signed-off-by: Jeff Moyer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs4/dev-ioctl.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 625abf5422e2..33bf8cbfd051 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -128,9 +128,10 @@ static inline void free_dev_ioctl(struct autofs_dev_ioctl *param) */ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param) { - int err = -EINVAL; + int err; - if (check_dev_ioctl_version(cmd, param)) { + err = check_dev_ioctl_version(cmd, param); + if (err) { AUTOFS_WARN("invalid device control module version " "supplied for cmd(0x%08x)", cmd); goto out; From 404443081ce5e6f68b5f7eda16c959835ff200c0 Mon Sep 17 00:00:00 2001 From: Mike Miller Date: Thu, 6 Nov 2008 12:53:24 -0800 Subject: [PATCH 129/173] cciss: fix sysfs broken symlink regression Regression introduced by commit 6ae5ce8e8d4de666f31286808d2285aa6a50fa40 ("cciss: remove redundant code"). This patch fixes a broken symlink in sysfs that was introduced by the above commit. We broke it in 2.6.27-rc on or about 20080804. Some installers are broken if this symlink does not exist and they may not detect the logical drives configured on the controller. It does not require being backported into 2.6.26.x or earlier kernels. Signed-off-by: Mike Miller Cc: Jens Axboe Cc: [2.6.27.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/cciss.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 00048bd26e7d..dc38368435aa 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -1370,6 +1370,7 @@ static void cciss_add_disk(ctlr_info_t *h, struct gendisk *disk, disk->first_minor = drv_index << NWD_SHIFT; disk->fops = &cciss_fops; disk->private_data = &h->drv[drv_index]; + disk->driverfs_dev = &h->pdev->dev; /* Set up queue information */ blk_queue_bounce_limit(disk->queue, h->pdev->dma_mask); From 22bece00dc1f28dd3374c55e464c9f02eb642876 Mon Sep 17 00:00:00 2001 From: Mike Miller Date: Thu, 6 Nov 2008 12:53:25 -0800 Subject: [PATCH 130/173] cciss: fix regression firmware not displayed in procfs This regression was introduced by commit 6ae5ce8e8d4de666f31286808d2285aa6a50fa40 ("cciss: remove redundant code"). This patch fixes a regression where the controller firmware version is not displayed in procfs. The previous patch would be called anytime something changed. This will get called only once for each controller. Signed-off-by: Mike Miller Cc: FUJITA Tomonori Cc: Jens Axboe Cc: [2.6.27.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/cciss.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index dc38368435aa..12de1fdaa6c6 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -3409,7 +3409,8 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, int i; int j = 0; int rc; - int dac; + int dac, return_code; + InquiryData_struct *inq_buff = NULL; i = alloc_cciss_hba(); if (i < 0) @@ -3515,6 +3516,25 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, /* Turn the interrupts on so we can service requests */ hba[i]->access.set_intr_mask(hba[i], CCISS_INTR_ON); + /* Get the firmware version */ + inq_buff = kzalloc(sizeof(InquiryData_struct), GFP_KERNEL); + if (inq_buff == NULL) { + printk(KERN_ERR "cciss: out of memory\n"); + goto clean4; + } + + return_code = sendcmd_withirq(CISS_INQUIRY, i, inq_buff, + sizeof(InquiryData_struct), 0, 0 , 0, TYPE_CMD); + if (return_code == IO_OK) { + hba[i]->firm_ver[0] = inq_buff->data_byte[32]; + hba[i]->firm_ver[1] = inq_buff->data_byte[33]; + hba[i]->firm_ver[2] = inq_buff->data_byte[34]; + hba[i]->firm_ver[3] = inq_buff->data_byte[35]; + } else { /* send command failed */ + printk(KERN_WARNING "cciss: unable to determine firmware" + " version of controller\n"); + } + cciss_procinit(i); hba[i]->cciss_max_sectors = 2048; @@ -3525,6 +3545,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, return 1; clean4: + kfree(inq_buff); #ifdef CONFIG_CISS_SCSI_TAPE kfree(hba[i]->scsi_rejects.complete); #endif From 69d177c2fc702d402b17fdca2190d5a7e3ca55c5 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Thu, 6 Nov 2008 12:53:26 -0800 Subject: [PATCH 131/173] hugetlbfs: handle pages higher order than MAX_ORDER When working with hugepages, hugetlbfs assumes that those hugepages are smaller than MAX_ORDER. Specifically it assumes that the mem_map is contigious and uses that to optimise access to the elements of the mem_map that represent the hugepage. Gigantic pages (such as 16GB pages on powerpc) by definition are of greater order than MAX_ORDER (larger than MAX_ORDER_NR_PAGES in size). This means that we can no longer make use of the buddy alloctor guarentees for the contiguity of the mem_map, which ensures that the mem_map is at least contigious for maximmally aligned areas of MAX_ORDER_NR_PAGES pages. This patch adds new mem_map accessors and iterator helpers which handle any discontiguity at MAX_ORDER_NR_PAGES boundaries. It then uses these to implement gigantic page versions of copy_huge_page and clear_huge_page, and to allow follow_hugetlb_page handle gigantic pages. Signed-off-by: Andy Whitcroft Cc: Jon Tollefson Cc: Mel Gorman Cc: Nick Piggin Cc: Christoph Lameter Cc: [2.6.27.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 37 ++++++++++++++++++++++++++++++++++++- mm/internal.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 421aee99b84a..e6afe527bd09 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -354,11 +354,26 @@ static int vma_has_reserves(struct vm_area_struct *vma) return 0; } +static void clear_gigantic_page(struct page *page, + unsigned long addr, unsigned long sz) +{ + int i; + struct page *p = page; + + might_sleep(); + for (i = 0; i < sz/PAGE_SIZE; i++, p = mem_map_next(p, page, i)) { + cond_resched(); + clear_user_highpage(p, addr + i * PAGE_SIZE); + } +} static void clear_huge_page(struct page *page, unsigned long addr, unsigned long sz) { int i; + if (unlikely(sz > MAX_ORDER_NR_PAGES)) + return clear_gigantic_page(page, addr, sz); + might_sleep(); for (i = 0; i < sz/PAGE_SIZE; i++) { cond_resched(); @@ -366,12 +381,32 @@ static void clear_huge_page(struct page *page, } } +static void copy_gigantic_page(struct page *dst, struct page *src, + unsigned long addr, struct vm_area_struct *vma) +{ + int i; + struct hstate *h = hstate_vma(vma); + struct page *dst_base = dst; + struct page *src_base = src; + might_sleep(); + for (i = 0; i < pages_per_huge_page(h); ) { + cond_resched(); + copy_user_highpage(dst, src, addr + i*PAGE_SIZE, vma); + + i++; + dst = mem_map_next(dst, dst_base, i); + src = mem_map_next(src, src_base, i); + } +} static void copy_huge_page(struct page *dst, struct page *src, unsigned long addr, struct vm_area_struct *vma) { int i; struct hstate *h = hstate_vma(vma); + if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) + return copy_gigantic_page(dst, src, addr, vma); + might_sleep(); for (i = 0; i < pages_per_huge_page(h); i++) { cond_resched(); @@ -2130,7 +2165,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, if (zeropage_ok) pages[i] = ZERO_PAGE(0); else - pages[i] = page + pfn_offset; + pages[i] = mem_map_offset(page, pfn_offset); get_page(pages[i]); } diff --git a/mm/internal.h b/mm/internal.h index e4e728bdf324..f482460de3e6 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -175,6 +175,34 @@ static inline void free_page_mlock(struct page *page) { } #endif /* CONFIG_UNEVICTABLE_LRU */ +/* + * Return the mem_map entry representing the 'offset' subpage within + * the maximally aligned gigantic page 'base'. Handle any discontiguity + * in the mem_map at MAX_ORDER_NR_PAGES boundaries. + */ +static inline struct page *mem_map_offset(struct page *base, int offset) +{ + if (unlikely(offset >= MAX_ORDER_NR_PAGES)) + return pfn_to_page(page_to_pfn(base) + offset); + return base + offset; +} + +/* + * Iterator over all subpages withing the maximally aligned gigantic + * page 'base'. Handle any discontiguity in the mem_map. + */ +static inline struct page *mem_map_next(struct page *iter, + struct page *base, int offset) +{ + if (unlikely((offset & (MAX_ORDER_NR_PAGES - 1)) == 0)) { + unsigned long pfn = page_to_pfn(base) + offset; + if (!pfn_valid(pfn)) + return NULL; + return pfn_to_page(pfn); + } + return iter + 1; +} + /* * FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node, * so all functions starting at paging_init should be marked __init From 18229df5b613ed0732a766fc37850de2e7988e43 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Thu, 6 Nov 2008 12:53:27 -0800 Subject: [PATCH 132/173] hugetlb: pull gigantic page initialisation out of the default path As we can determine exactly when a gigantic page is in use we can optimise the common regular page cases by pulling out gigantic page initialisation into its own function. As gigantic pages are never released to buddy we do not need a destructor. This effectivly reverts the previous change to the main buddy allocator. It also adds a paranoid check to ensure we never release gigantic pages from hugetlbfs to the main buddy. Signed-off-by: Andy Whitcroft Cc: Jon Tollefson Cc: Mel Gorman Cc: Nick Piggin Cc: Christoph Lameter Cc: [2.6.27.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 12 +++++++++++- mm/internal.h | 1 + mm/page_alloc.c | 30 ++++++++++++++++++++++-------- 3 files changed, 34 insertions(+), 9 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e6afe527bd09..d143ab67be44 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -491,6 +491,8 @@ static void update_and_free_page(struct hstate *h, struct page *page) { int i; + VM_BUG_ON(h->order >= MAX_ORDER); + h->nr_huge_pages--; h->nr_huge_pages_node[page_to_nid(page)]--; for (i = 0; i < pages_per_huge_page(h); i++) { @@ -1005,6 +1007,14 @@ __attribute__((weak)) int alloc_bootmem_huge_page(struct hstate *h) return 1; } +static void prep_compound_huge_page(struct page *page, int order) +{ + if (unlikely(order > (MAX_ORDER - 1))) + prep_compound_gigantic_page(page, order); + else + prep_compound_page(page, order); +} + /* Put bootmem huge pages into the standard lists after mem_map is up */ static void __init gather_bootmem_prealloc(void) { @@ -1015,7 +1025,7 @@ static void __init gather_bootmem_prealloc(void) struct hstate *h = m->hstate; __ClearPageReserved(page); WARN_ON(page_count(page) != 1); - prep_compound_page(page, h->order); + prep_compound_huge_page(page, h->order); prep_new_huge_page(h, page, page_to_nid(page)); } } diff --git a/mm/internal.h b/mm/internal.h index f482460de3e6..13333bc2eb68 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -17,6 +17,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, unsigned long floor, unsigned long ceiling); extern void prep_compound_page(struct page *page, unsigned long order); +extern void prep_compound_gigantic_page(struct page *page, unsigned long order); static inline void set_page_count(struct page *page, int v) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d0a240fbb8bf..54069e64e3a8 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -263,34 +263,48 @@ void prep_compound_page(struct page *page, unsigned long order) { int i; int nr_pages = 1 << order; - struct page *p = page + 1; set_compound_page_dtor(page, free_compound_page); set_compound_order(page, order); __SetPageHead(page); - for (i = 1; i < nr_pages; i++, p++) { - if (unlikely((i & (MAX_ORDER_NR_PAGES - 1)) == 0)) - p = pfn_to_page(page_to_pfn(page) + i); + for (i = 1; i < nr_pages; i++) { + struct page *p = page + i; + __SetPageTail(p); p->first_page = page; } } -static void destroy_compound_page(struct page *page, unsigned long order) +#ifdef CONFIG_HUGETLBFS +void prep_compound_gigantic_page(struct page *page, unsigned long order) { int i; int nr_pages = 1 << order; struct page *p = page + 1; + set_compound_page_dtor(page, free_compound_page); + set_compound_order(page, order); + __SetPageHead(page); + for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) { + __SetPageTail(p); + p->first_page = page; + } +} +#endif + +static void destroy_compound_page(struct page *page, unsigned long order) +{ + int i; + int nr_pages = 1 << order; + if (unlikely(compound_order(page) != order)) bad_page(page); if (unlikely(!PageHead(page))) bad_page(page); __ClearPageHead(page); - for (i = 1; i < nr_pages; i++, p++) { - if (unlikely((i & (MAX_ORDER_NR_PAGES - 1)) == 0)) - p = pfn_to_page(page_to_pfn(page) + i); + for (i = 1; i < nr_pages; i++) { + struct page *p = page + i; if (unlikely(!PageTail(p) | (p->first_page != page))) From 953a64798d82ee5467da2bdc0e467ef874fbc208 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 6 Nov 2008 12:53:28 -0800 Subject: [PATCH 133/173] MAINTAINERS: make IOAT easier to find Searching MAINTAINERS for "ioat" comes up empty. Fix this. Cc: "Dan Williams" Cc: "Sosnowski, Maciej" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index d643e862b8e4..7e6a17e1de09 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -721,7 +721,7 @@ W: http://sourceforge.net/projects/acpi4asus W: http://xf.iksaif.net/acpi4asus S: Maintained -ASYNCHRONOUS TRANSFERS/TRANSFORMS API +ASYNCHRONOUS TRANSFERS/TRANSFORMS (IOAT) API P: Dan Williams M: dan.j.williams@intel.com P: Maciej Sosnowski From b4416d2bea007f07f2e74cdc4cb64042ec996c83 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Thu, 6 Nov 2008 12:53:29 -0800 Subject: [PATCH 134/173] oom: do not dump task state for non thread group leaders When /proc/sys/vm/oom_dump_tasks is enabled, it's only necessary to dump task state information for thread group leaders. The kernel log gets quickly overwhelmed on machines with a massive number of threads by dumping non-thread group leaders. Reviewed-by: Christoph Lameter Signed-off-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/oom_kill.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 64e5b4bcd964..2846a58e5de9 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -295,6 +295,8 @@ static void dump_tasks(const struct mem_cgroup *mem) continue; if (mem && !task_in_mem_cgroup(p, mem)) continue; + if (!thread_group_leader(p)) + continue; task_lock(p); printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n", From 17a1217e12d8c8434f8a3deef7bf980c724a6ac7 Mon Sep 17 00:00:00 2001 From: Anatolij Gustschin Date: Thu, 6 Nov 2008 12:53:29 -0800 Subject: [PATCH 135/173] fbdev: add new framebuffer driver for Fujitsu MB862xx GDCs Add a framebuffer driver for the Fujitsu Carmine/Coral-P(A)/Lime graphics controllers. Lime GDC support is known to work on PPC440EPx based lwmon5 and MPC8544E based socrates embedded boards, both equipped with Lime GDC. Carmine/Coral-P PCI GDC support is known to work on PPC440EPx based Sequoia board and also on x86 platform. Signed-off-by: Anatolij Gustschin Cc: Dmitry Baryshkov Cc: Anton Vorontsov Cc: Matteo Fortini Cc: Krzysztof Helt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/Kconfig | 32 + drivers/video/Makefile | 1 + drivers/video/mb862xx/Makefile | 5 + drivers/video/mb862xx/mb862xx_reg.h | 138 ++++ drivers/video/mb862xx/mb862xxfb.c | 1061 +++++++++++++++++++++++++++ drivers/video/mb862xx/mb862xxfb.h | 83 +++ 6 files changed, 1320 insertions(+) create mode 100644 drivers/video/mb862xx/Makefile create mode 100644 drivers/video/mb862xx/mb862xx_reg.h create mode 100644 drivers/video/mb862xx/mb862xxfb.c create mode 100644 drivers/video/mb862xx/mb862xxfb.h diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig index 0f13448c6f79..3f3ce13fef43 100644 --- a/drivers/video/Kconfig +++ b/drivers/video/Kconfig @@ -2083,6 +2083,38 @@ config FB_METRONOME controller. The pre-release name for this device was 8track and could also have been called by some vendors as PVI-nnnn. +config FB_MB862XX + tristate "Fujitsu MB862xx GDC support" + depends on FB + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + ---help--- + Frame buffer driver for Fujitsu Carmine/Coral-P(A)/Lime controllers. + +config FB_MB862XX_PCI_GDC + bool "Carmine/Coral-P(A) GDC" + depends on PCI && FB_MB862XX + ---help--- + This enables framebuffer support for Fujitsu Carmine/Coral-P(A) + PCI graphics controller devices. + +config FB_MB862XX_LIME + bool "Lime GDC" + depends on FB_MB862XX + depends on OF && !FB_MB862XX_PCI_GDC + select FB_FOREIGN_ENDIAN + select FB_LITTLE_ENDIAN + ---help--- + Framebuffer support for Fujitsu Lime GDC on host CPU bus. + +config FB_PRE_INIT_FB + bool "Don't reinitialize, use bootloader's GDC/Display configuration" + depends on FB_MB862XX_LIME + ---help--- + Select this option if display contents should be inherited as set by + the bootloader. + source "drivers/video/omap/Kconfig" source "drivers/video/backlight/Kconfig" diff --git a/drivers/video/Makefile b/drivers/video/Makefile index 248bddc8d0b0..e39e33e797da 100644 --- a/drivers/video/Makefile +++ b/drivers/video/Makefile @@ -122,6 +122,7 @@ obj-$(CONFIG_FB_SH_MOBILE_LCDC) += sh_mobile_lcdcfb.o obj-$(CONFIG_FB_OMAP) += omap/ obj-$(CONFIG_XEN_FBDEV_FRONTEND) += xen-fbfront.o obj-$(CONFIG_FB_CARMINE) += carminefb.o +obj-$(CONFIG_FB_MB862XX) += mb862xx/ # Platform or fallback drivers go here obj-$(CONFIG_FB_UVESA) += uvesafb.o diff --git a/drivers/video/mb862xx/Makefile b/drivers/video/mb862xx/Makefile new file mode 100644 index 000000000000..07664814bb1d --- /dev/null +++ b/drivers/video/mb862xx/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for the MB862xx framebuffer driver +# + +obj-$(CONFIG_FB_MB862XX) := mb862xxfb.o diff --git a/drivers/video/mb862xx/mb862xx_reg.h b/drivers/video/mb862xx/mb862xx_reg.h new file mode 100644 index 000000000000..2ba65e118500 --- /dev/null +++ b/drivers/video/mb862xx/mb862xx_reg.h @@ -0,0 +1,138 @@ +/* + * Fujitsu MB862xx Graphics Controller Registers/Bits + */ + +#ifndef _MB862XX_REG_H +#define _MB862XX_REG_H + +#ifdef MB862XX_MMIO_BOTTOM +#define MB862XX_MMIO_BASE 0x03fc0000 +#else +#define MB862XX_MMIO_BASE 0x01fc0000 +#endif +#define MB862XX_I2C_BASE 0x0000c000 +#define MB862XX_DISP_BASE 0x00010000 +#define MB862XX_CAP_BASE 0x00018000 +#define MB862XX_DRAW_BASE 0x00030000 +#define MB862XX_GEO_BASE 0x00038000 +#define MB862XX_PIO_BASE 0x00038000 +#define MB862XX_MMIO_SIZE 0x40000 + +/* Host interface/pio registers */ +#define GC_IST 0x00000020 +#define GC_IMASK 0x00000024 +#define GC_SRST 0x0000002c +#define GC_CCF 0x00000038 +#define GC_CID 0x000000f0 +#define GC_REVISION 0x00000084 + +#define GC_CCF_CGE_100 0x00000000 +#define GC_CCF_CGE_133 0x00040000 +#define GC_CCF_CGE_166 0x00080000 +#define GC_CCF_COT_100 0x00000000 +#define GC_CCF_COT_133 0x00010000 +#define GC_CID_CNAME_MSK 0x0000ff00 +#define GC_CID_VERSION_MSK 0x000000ff + +/* define enabled interrupts hereby */ +#define GC_INT_EN 0x00000000 + +/* Memory interface mode register */ +#define GC_MMR 0x0000fffc + +/* Display Controller registers */ +#define GC_DCM0 0x00000000 +#define GC_HTP 0x00000004 +#define GC_HDB_HDP 0x00000008 +#define GC_VSW_HSW_HSP 0x0000000c +#define GC_VTR 0x00000010 +#define GC_VDP_VSP 0x00000014 +#define GC_WY_WX 0x00000018 +#define GC_WH_WW 0x0000001c +#define GC_L0M 0x00000020 +#define GC_L0OA0 0x00000024 +#define GC_L0DA0 0x00000028 +#define GC_L0DY_L0DX 0x0000002c +#define GC_DCM1 0x00000100 +#define GC_L0EM 0x00000110 +#define GC_L0WY_L0WX 0x00000114 +#define GC_L0WH_L0WW 0x00000118 +#define GC_DCM2 0x00000104 +#define GC_DCM3 0x00000108 +#define GC_CPM_CUTC 0x000000a0 +#define GC_CUOA0 0x000000a4 +#define GC_CUY0_CUX0 0x000000a8 +#define GC_CUOA1 0x000000ac +#define GC_CUY1_CUX1 0x000000b0 +#define GC_L0PAL0 0x00000400 + +#define GC_CPM_CEN0 0x00100000 +#define GC_CPM_CEN1 0x00200000 + +#define GC_DCM01_ESY 0x00000004 +#define GC_DCM01_SC 0x00003f00 +#define GC_DCM01_RESV 0x00004000 +#define GC_DCM01_CKS 0x00008000 +#define GC_DCM01_L0E 0x00010000 +#define GC_DCM01_DEN 0x80000000 +#define GC_L0M_L0C_8 0x00000000 +#define GC_L0M_L0C_16 0x80000000 +#define GC_L0EM_L0EC_24 0x40000000 +#define GC_L0M_L0W_UNIT 64 + +#define GC_DISP_REFCLK_400 400 + +/* Carmine specific */ +#define MB86297_DRAW_BASE 0x00020000 +#define MB86297_DISP0_BASE 0x00100000 +#define MB86297_DISP1_BASE 0x00140000 +#define MB86297_WRBACK_BASE 0x00180000 +#define MB86297_CAP0_BASE 0x00200000 +#define MB86297_CAP1_BASE 0x00280000 +#define MB86297_DRAMCTRL_BASE 0x00300000 +#define MB86297_CTRL_BASE 0x00400000 +#define MB86297_I2C_BASE 0x00500000 + +#define GC_CTRL_STATUS 0x00000000 +#define GC_CTRL_INT_MASK 0x00000004 +#define GC_CTRL_CLK_ENABLE 0x0000000c +#define GC_CTRL_SOFT_RST 0x00000010 + +#define GC_CTRL_CLK_EN_DRAM 0x00000001 +#define GC_CTRL_CLK_EN_2D3D 0x00000002 +#define GC_CTRL_CLK_EN_DISP0 0x00000020 +#define GC_CTRL_CLK_EN_DISP1 0x00000040 + +#define GC_2D3D_REV 0x000004b4 +#define GC_RE_REVISION 0x24240200 + +/* define enabled interrupts hereby */ +#define GC_CARMINE_INT_EN 0x00000004 + +/* DRAM controller */ +#define GC_DCTL_MODE_ADD 0x00000000 +#define GC_DCTL_SETTIME1_EMODE 0x00000004 +#define GC_DCTL_REFRESH_SETTIME2 0x00000008 +#define GC_DCTL_RSV0_STATES 0x0000000C +#define GC_DCTL_RSV2_RSV1 0x00000010 +#define GC_DCTL_DDRIF2_DDRIF1 0x00000014 +#define GC_DCTL_IOCONT1_IOCONT0 0x00000024 + +#define GC_DCTL_STATES_MSK 0x0000000f +#define GC_DCTL_INIT_WAIT_CNT 3000 +#define GC_DCTL_INIT_WAIT_INTERVAL 1 + +/* DRAM ctrl values for Carmine PCI Eval. board */ +#define GC_EVB_DCTL_MODE_ADD 0x012105c3 +#define GC_EVB_DCTL_MODE_ADD_AFT_RST 0x002105c3 +#define GC_EVB_DCTL_SETTIME1_EMODE 0x47498000 +#define GC_EVB_DCTL_REFRESH_SETTIME2 0x00422a22 +#define GC_EVB_DCTL_RSV0_STATES 0x00200003 +#define GC_EVB_DCTL_RSV0_STATES_AFT_RST 0x00200002 +#define GC_EVB_DCTL_RSV2_RSV1 0x0000000f +#define GC_EVB_DCTL_DDRIF2_DDRIF1 0x00556646 +#define GC_EVB_DCTL_IOCONT1_IOCONT0 0x05550555 + +#define GC_DISP_REFCLK_533 533 + +#endif diff --git a/drivers/video/mb862xx/mb862xxfb.c b/drivers/video/mb862xx/mb862xxfb.c new file mode 100644 index 000000000000..38718d95fbb9 --- /dev/null +++ b/drivers/video/mb862xx/mb862xxfb.c @@ -0,0 +1,1061 @@ +/* + * drivers/mb862xx/mb862xxfb.c + * + * Fujitsu Carmine/Coral-P(A)/Lime framebuffer driver + * + * (C) 2008 Anatolij Gustschin + * DENX Software Engineering + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#if defined(CONFIG_PPC_OF) +#include +#endif +#include "mb862xxfb.h" +#include "mb862xx_reg.h" + +#define NR_PALETTE 256 +#define MB862XX_MEM_SIZE 0x1000000 +#define CORALP_MEM_SIZE 0x4000000 +#define CARMINE_MEM_SIZE 0x8000000 +#define DRV_NAME "mb862xxfb" + +#if defined(CONFIG_LWMON5) +static struct mb862xx_gc_mode lwmon5_gc_mode = { + /* Mode for Sharp LQ104V1DG61 TFT LCD Panel */ + { "640x480", 60, 640, 480, 40000, 48, 16, 32, 11, 96, 2, 0, 0, 0 }, + /* 16 bits/pixel, 32MB, 100MHz, SDRAM memory mode value */ + 16, 0x2000000, GC_CCF_COT_100, 0x414fb7f2 +}; +#endif + +#if defined(CONFIG_SOCRATES) +static struct mb862xx_gc_mode socrates_gc_mode = { + /* Mode for Prime View PM070WL4 TFT LCD Panel */ + { "800x480", 45, 800, 480, 40000, 86, 42, 33, 10, 128, 2, 0, 0, 0 }, + /* 16 bits/pixel, 16MB, 133MHz, SDRAM memory mode value */ + 16, 0x1000000, GC_CCF_COT_133, 0x4157ba63 +}; +#endif + +/* Helpers */ +static inline int h_total(struct fb_var_screeninfo *var) +{ + return var->xres + var->left_margin + + var->right_margin + var->hsync_len; +} + +static inline int v_total(struct fb_var_screeninfo *var) +{ + return var->yres + var->upper_margin + + var->lower_margin + var->vsync_len; +} + +static inline int hsp(struct fb_var_screeninfo *var) +{ + return var->xres + var->right_margin - 1; +} + +static inline int vsp(struct fb_var_screeninfo *var) +{ + return var->yres + var->lower_margin - 1; +} + +static inline int d_pitch(struct fb_var_screeninfo *var) +{ + return var->xres * var->bits_per_pixel / 8; +} + +static inline unsigned int chan_to_field(unsigned int chan, + struct fb_bitfield *bf) +{ + chan &= 0xffff; + chan >>= 16 - bf->length; + return chan << bf->offset; +} + +static int mb862xxfb_setcolreg(unsigned regno, + unsigned red, unsigned green, unsigned blue, + unsigned transp, struct fb_info *info) +{ + struct mb862xxfb_par *par = info->par; + unsigned int val; + + switch (info->fix.visual) { + case FB_VISUAL_TRUECOLOR: + if (regno < 16) { + val = chan_to_field(red, &info->var.red); + val |= chan_to_field(green, &info->var.green); + val |= chan_to_field(blue, &info->var.blue); + par->pseudo_palette[regno] = val; + } + break; + case FB_VISUAL_PSEUDOCOLOR: + if (regno < 256) { + val = (red >> 8) << 16; + val |= (green >> 8) << 8; + val |= blue >> 8; + outreg(disp, GC_L0PAL0 + (regno * 4), val); + } + break; + default: + return 1; /* unsupported type */ + } + return 0; +} + +static int mb862xxfb_check_var(struct fb_var_screeninfo *var, + struct fb_info *fbi) +{ + unsigned long tmp; + + if (fbi->dev) + dev_dbg(fbi->dev, "%s\n", __func__); + + /* check if these values fit into the registers */ + if (var->hsync_len > 255 || var->vsync_len > 255) + return -EINVAL; + + if ((var->xres + var->right_margin) >= 4096) + return -EINVAL; + + if ((var->yres + var->lower_margin) > 4096) + return -EINVAL; + + if (h_total(var) > 4096 || v_total(var) > 4096) + return -EINVAL; + + if (var->xres_virtual > 4096 || var->yres_virtual > 4096) + return -EINVAL; + + if (var->bits_per_pixel <= 8) + var->bits_per_pixel = 8; + else if (var->bits_per_pixel <= 16) + var->bits_per_pixel = 16; + else if (var->bits_per_pixel <= 32) + var->bits_per_pixel = 32; + + /* + * can cope with 8,16 or 24/32bpp if resulting + * pitch is divisible by 64 without remainder + */ + if (d_pitch(&fbi->var) % GC_L0M_L0W_UNIT) { + int r; + + var->bits_per_pixel = 0; + do { + var->bits_per_pixel += 8; + r = d_pitch(&fbi->var) % GC_L0M_L0W_UNIT; + } while (r && var->bits_per_pixel <= 32); + + if (d_pitch(&fbi->var) % GC_L0M_L0W_UNIT) + return -EINVAL; + } + + /* line length is going to be 128 bit aligned */ + tmp = (var->xres * var->bits_per_pixel) / 8; + if ((tmp & 15) != 0) + return -EINVAL; + + /* set r/g/b positions and validate bpp */ + switch (var->bits_per_pixel) { + case 8: + var->red.length = var->bits_per_pixel; + var->green.length = var->bits_per_pixel; + var->blue.length = var->bits_per_pixel; + var->red.offset = 0; + var->green.offset = 0; + var->blue.offset = 0; + var->transp.length = 0; + break; + case 16: + var->red.length = 5; + var->green.length = 5; + var->blue.length = 5; + var->red.offset = 10; + var->green.offset = 5; + var->blue.offset = 0; + var->transp.length = 0; + break; + case 24: + case 32: + var->transp.length = 8; + var->red.length = 8; + var->green.length = 8; + var->blue.length = 8; + var->transp.offset = 24; + var->red.offset = 16; + var->green.offset = 8; + var->blue.offset = 0; + break; + default: + return -EINVAL; + } + return 0; +} + +/* + * set display parameters + */ +static int mb862xxfb_set_par(struct fb_info *fbi) +{ + struct mb862xxfb_par *par = fbi->par; + unsigned long reg, sc; + + dev_dbg(par->dev, "%s\n", __func__); + + if (par->pre_init) + return 0; + + /* disp off */ + reg = inreg(disp, GC_DCM1); + reg &= ~GC_DCM01_DEN; + outreg(disp, GC_DCM1, reg); + + /* set display reference clock div. */ + sc = par->refclk / (1000000 / fbi->var.pixclock) - 1; + reg = inreg(disp, GC_DCM1); + reg &= ~(GC_DCM01_CKS | GC_DCM01_RESV | GC_DCM01_SC); + reg |= sc << 8; + outreg(disp, GC_DCM1, reg); + dev_dbg(par->dev, "SC 0x%lx\n", sc); + + /* disp dimension, format */ + reg = pack(d_pitch(&fbi->var) / GC_L0M_L0W_UNIT, + (fbi->var.yres - 1)); + if (fbi->var.bits_per_pixel == 16) + reg |= GC_L0M_L0C_16; + outreg(disp, GC_L0M, reg); + + if (fbi->var.bits_per_pixel == 32) { + reg = inreg(disp, GC_L0EM); + outreg(disp, GC_L0EM, reg | GC_L0EM_L0EC_24); + } + outreg(disp, GC_WY_WX, 0); + reg = pack(fbi->var.yres - 1, fbi->var.xres); + outreg(disp, GC_WH_WW, reg); + outreg(disp, GC_L0OA0, 0); + outreg(disp, GC_L0DA0, 0); + outreg(disp, GC_L0DY_L0DX, 0); + outreg(disp, GC_L0WY_L0WX, 0); + outreg(disp, GC_L0WH_L0WW, reg); + + /* both HW-cursors off */ + reg = inreg(disp, GC_CPM_CUTC); + reg &= ~(GC_CPM_CEN0 | GC_CPM_CEN1); + outreg(disp, GC_CPM_CUTC, reg); + + /* timings */ + reg = pack(fbi->var.xres - 1, fbi->var.xres - 1); + outreg(disp, GC_HDB_HDP, reg); + reg = pack((fbi->var.yres - 1), vsp(&fbi->var)); + outreg(disp, GC_VDP_VSP, reg); + reg = ((fbi->var.vsync_len - 1) << 24) | + pack((fbi->var.hsync_len - 1), hsp(&fbi->var)); + outreg(disp, GC_VSW_HSW_HSP, reg); + outreg(disp, GC_HTP, pack(h_total(&fbi->var) - 1, 0)); + outreg(disp, GC_VTR, pack(v_total(&fbi->var) - 1, 0)); + + /* display on */ + reg = inreg(disp, GC_DCM1); + reg |= GC_DCM01_DEN | GC_DCM01_L0E; + reg &= ~GC_DCM01_ESY; + outreg(disp, GC_DCM1, reg); + return 0; +} + +static int mb862xxfb_pan(struct fb_var_screeninfo *var, + struct fb_info *info) +{ + struct mb862xxfb_par *par = info->par; + unsigned long reg; + + reg = pack(var->yoffset, var->xoffset); + outreg(disp, GC_L0WY_L0WX, reg); + + reg = pack(var->yres_virtual, var->xres_virtual); + outreg(disp, GC_L0WH_L0WW, reg); + return 0; +} + +static int mb862xxfb_blank(int mode, struct fb_info *fbi) +{ + struct mb862xxfb_par *par = fbi->par; + unsigned long reg; + + dev_dbg(fbi->dev, "blank mode=%d\n", mode); + + switch (mode) { + case FB_BLANK_POWERDOWN: + reg = inreg(disp, GC_DCM1); + reg &= ~GC_DCM01_DEN; + outreg(disp, GC_DCM1, reg); + break; + case FB_BLANK_UNBLANK: + reg = inreg(disp, GC_DCM1); + reg |= GC_DCM01_DEN; + outreg(disp, GC_DCM1, reg); + break; + case FB_BLANK_NORMAL: + case FB_BLANK_VSYNC_SUSPEND: + case FB_BLANK_HSYNC_SUSPEND: + default: + return 1; + } + return 0; +} + +/* framebuffer ops */ +static struct fb_ops mb862xxfb_ops = { + .owner = THIS_MODULE, + .fb_check_var = mb862xxfb_check_var, + .fb_set_par = mb862xxfb_set_par, + .fb_setcolreg = mb862xxfb_setcolreg, + .fb_blank = mb862xxfb_blank, + .fb_pan_display = mb862xxfb_pan, + .fb_fillrect = cfb_fillrect, + .fb_copyarea = cfb_copyarea, + .fb_imageblit = cfb_imageblit, +}; + +/* initialize fb_info data */ +static int mb862xxfb_init_fbinfo(struct fb_info *fbi) +{ + struct mb862xxfb_par *par = fbi->par; + struct mb862xx_gc_mode *mode = par->gc_mode; + unsigned long reg; + + fbi->fbops = &mb862xxfb_ops; + fbi->pseudo_palette = par->pseudo_palette; + fbi->screen_base = par->fb_base; + fbi->screen_size = par->mapped_vram; + + strcpy(fbi->fix.id, DRV_NAME); + fbi->fix.smem_start = (unsigned long)par->fb_base_phys; + fbi->fix.smem_len = par->mapped_vram; + fbi->fix.mmio_start = (unsigned long)par->mmio_base_phys; + fbi->fix.mmio_len = par->mmio_len; + fbi->fix.accel = FB_ACCEL_NONE; + fbi->fix.type = FB_TYPE_PACKED_PIXELS; + fbi->fix.type_aux = 0; + fbi->fix.xpanstep = 1; + fbi->fix.ypanstep = 1; + fbi->fix.ywrapstep = 0; + + reg = inreg(disp, GC_DCM1); + if (reg & GC_DCM01_DEN && reg & GC_DCM01_L0E) { + /* get the disp mode from active display cfg */ + unsigned long sc = ((reg & GC_DCM01_SC) >> 8) + 1; + unsigned long hsp, vsp, ht, vt; + + dev_dbg(par->dev, "using bootloader's disp. mode\n"); + fbi->var.pixclock = (sc * 1000000) / par->refclk; + fbi->var.xres = (inreg(disp, GC_HDB_HDP) & 0x0fff) + 1; + reg = inreg(disp, GC_VDP_VSP); + fbi->var.yres = ((reg >> 16) & 0x0fff) + 1; + vsp = (reg & 0x0fff) + 1; + fbi->var.xres_virtual = fbi->var.xres; + fbi->var.yres_virtual = fbi->var.yres; + reg = inreg(disp, GC_L0EM); + if (reg & GC_L0EM_L0EC_24) { + fbi->var.bits_per_pixel = 32; + } else { + reg = inreg(disp, GC_L0M); + if (reg & GC_L0M_L0C_16) + fbi->var.bits_per_pixel = 16; + else + fbi->var.bits_per_pixel = 8; + } + reg = inreg(disp, GC_VSW_HSW_HSP); + fbi->var.hsync_len = ((reg & 0xff0000) >> 16) + 1; + fbi->var.vsync_len = ((reg & 0x3f000000) >> 24) + 1; + hsp = (reg & 0xffff) + 1; + ht = ((inreg(disp, GC_HTP) & 0xfff0000) >> 16) + 1; + fbi->var.right_margin = hsp - fbi->var.xres; + fbi->var.left_margin = ht - hsp - fbi->var.hsync_len; + vt = ((inreg(disp, GC_VTR) & 0xfff0000) >> 16) + 1; + fbi->var.lower_margin = vsp - fbi->var.yres; + fbi->var.upper_margin = vt - vsp - fbi->var.vsync_len; + } else if (mode) { + dev_dbg(par->dev, "using supplied mode\n"); + fb_videomode_to_var(&fbi->var, (struct fb_videomode *)mode); + fbi->var.bits_per_pixel = mode->def_bpp ? mode->def_bpp : 8; + } else { + int ret; + + ret = fb_find_mode(&fbi->var, fbi, "640x480-16@60", + NULL, 0, NULL, 16); + if (ret == 0 || ret == 4) { + dev_err(par->dev, + "failed to get initial mode\n"); + return -EINVAL; + } + } + + fbi->var.xoffset = 0; + fbi->var.yoffset = 0; + fbi->var.grayscale = 0; + fbi->var.nonstd = 0; + fbi->var.height = -1; + fbi->var.width = -1; + fbi->var.accel_flags = 0; + fbi->var.vmode = FB_VMODE_NONINTERLACED; + fbi->var.activate = FB_ACTIVATE_NOW; + fbi->flags = FBINFO_DEFAULT | +#ifdef __BIG_ENDIAN + FBINFO_FOREIGN_ENDIAN | +#endif + FBINFO_HWACCEL_XPAN | + FBINFO_HWACCEL_YPAN; + + /* check and possibly fix bpp */ + if ((fbi->fbops->fb_check_var)(&fbi->var, fbi)) + dev_err(par->dev, "check_var() failed on initial setup?\n"); + + fbi->fix.visual = fbi->var.bits_per_pixel == 8 ? + FB_VISUAL_PSEUDOCOLOR : FB_VISUAL_TRUECOLOR; + fbi->fix.line_length = (fbi->var.xres_virtual * + fbi->var.bits_per_pixel) / 8; + return 0; +} + +/* + * show some display controller and cursor registers + */ +static ssize_t mb862xxfb_show_dispregs(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct fb_info *fbi = dev_get_drvdata(dev); + struct mb862xxfb_par *par = fbi->par; + char *ptr = buf; + unsigned int reg; + + for (reg = GC_DCM0; reg <= GC_L0DY_L0DX; reg += 4) + ptr += sprintf(ptr, "%08x = %08x\n", + reg, inreg(disp, reg)); + + for (reg = GC_CPM_CUTC; reg <= GC_CUY1_CUX1; reg += 4) + ptr += sprintf(ptr, "%08x = %08x\n", + reg, inreg(disp, reg)); + + for (reg = GC_DCM1; reg <= GC_L0WH_L0WW; reg += 4) + ptr += sprintf(ptr, "%08x = %08x\n", + reg, inreg(disp, reg)); + + return ptr - buf; +} + +static DEVICE_ATTR(dispregs, 0444, mb862xxfb_show_dispregs, NULL); + +irqreturn_t mb862xx_intr(int irq, void *dev_id) +{ + struct mb862xxfb_par *par = (struct mb862xxfb_par *) dev_id; + unsigned long reg_ist, mask; + + if (!par) + return IRQ_NONE; + + if (par->type == BT_CARMINE) { + /* Get Interrupt Status */ + reg_ist = inreg(ctrl, GC_CTRL_STATUS); + mask = inreg(ctrl, GC_CTRL_INT_MASK); + if (reg_ist == 0) + return IRQ_HANDLED; + + reg_ist &= mask; + if (reg_ist == 0) + return IRQ_HANDLED; + + /* Clear interrupt status */ + outreg(ctrl, 0x0, reg_ist); + } else { + /* Get status */ + reg_ist = inreg(host, GC_IST); + mask = inreg(host, GC_IMASK); + + reg_ist &= mask; + if (reg_ist == 0) + return IRQ_HANDLED; + + /* Clear status */ + outreg(host, GC_IST, ~reg_ist); + } + return IRQ_HANDLED; +} + +#if defined(CONFIG_FB_MB862XX_LIME) +/* + * GDC (Lime, Coral(B/Q), Mint, ...) on host bus + */ +static int mb862xx_gdc_init(struct mb862xxfb_par *par) +{ + unsigned long ccf, mmr; + unsigned long ver, rev; + + if (!par) + return -ENODEV; + +#if defined(CONFIG_FB_PRE_INIT_FB) + par->pre_init = 1; +#endif + par->host = par->mmio_base; + par->i2c = par->mmio_base + MB862XX_I2C_BASE; + par->disp = par->mmio_base + MB862XX_DISP_BASE; + par->cap = par->mmio_base + MB862XX_CAP_BASE; + par->draw = par->mmio_base + MB862XX_DRAW_BASE; + par->geo = par->mmio_base + MB862XX_GEO_BASE; + par->pio = par->mmio_base + MB862XX_PIO_BASE; + + par->refclk = GC_DISP_REFCLK_400; + + ver = inreg(host, GC_CID); + rev = inreg(pio, GC_REVISION); + if ((ver == 0x303) && (rev & 0xffffff00) == 0x20050100) { + dev_info(par->dev, "Fujitsu Lime v1.%d found\n", + (int)rev & 0xff); + par->type = BT_LIME; + ccf = par->gc_mode ? par->gc_mode->ccf : GC_CCF_COT_100; + mmr = par->gc_mode ? par->gc_mode->mmr : 0x414fb7f2; + } else { + dev_info(par->dev, "? GDC, CID/Rev.: 0x%lx/0x%lx \n", ver, rev); + return -ENODEV; + } + + if (!par->pre_init) { + outreg(host, GC_CCF, ccf); + udelay(200); + outreg(host, GC_MMR, mmr); + udelay(10); + } + + /* interrupt status */ + outreg(host, GC_IST, 0); + outreg(host, GC_IMASK, GC_INT_EN); + return 0; +} + +static int __devinit of_platform_mb862xx_probe(struct of_device *ofdev, + const struct of_device_id *id) +{ + struct device_node *np = ofdev->node; + struct device *dev = &ofdev->dev; + struct mb862xxfb_par *par; + struct fb_info *info; + struct resource res; + resource_size_t res_size; + unsigned long ret = -ENODEV; + + if (of_address_to_resource(np, 0, &res)) { + dev_err(dev, "Invalid address\n"); + return -ENXIO; + } + + info = framebuffer_alloc(sizeof(struct mb862xxfb_par), dev); + if (info == NULL) { + dev_err(dev, "cannot allocate framebuffer\n"); + return -ENOMEM; + } + + par = info->par; + par->info = info; + par->dev = dev; + + par->irq = irq_of_parse_and_map(np, 0); + if (par->irq == NO_IRQ) { + dev_err(dev, "failed to map irq\n"); + ret = -ENODEV; + goto fbrel; + } + + res_size = 1 + res.end - res.start; + par->res = request_mem_region(res.start, res_size, DRV_NAME); + if (par->res == NULL) { + dev_err(dev, "Cannot claim framebuffer/mmio\n"); + ret = -ENXIO; + goto irqdisp; + } + +#if defined(CONFIG_LWMON5) + par->gc_mode = &lwmon5_gc_mode; +#endif + +#if defined(CONFIG_SOCRATES) + par->gc_mode = &socrates_gc_mode; +#endif + + par->fb_base_phys = res.start; + par->mmio_base_phys = res.start + MB862XX_MMIO_BASE; + par->mmio_len = MB862XX_MMIO_SIZE; + if (par->gc_mode) + par->mapped_vram = par->gc_mode->max_vram; + else + par->mapped_vram = MB862XX_MEM_SIZE; + + par->fb_base = ioremap(par->fb_base_phys, par->mapped_vram); + if (par->fb_base == NULL) { + dev_err(dev, "Cannot map framebuffer\n"); + goto rel_reg; + } + + par->mmio_base = ioremap(par->mmio_base_phys, par->mmio_len); + if (par->mmio_base == NULL) { + dev_err(dev, "Cannot map registers\n"); + goto fb_unmap; + } + + dev_dbg(dev, "fb phys 0x%llx 0x%lx\n", + (u64)par->fb_base_phys, (ulong)par->mapped_vram); + dev_dbg(dev, "mmio phys 0x%llx 0x%lx, (irq = %d)\n", + (u64)par->mmio_base_phys, (ulong)par->mmio_len, par->irq); + + if (mb862xx_gdc_init(par)) + goto io_unmap; + + if (request_irq(par->irq, mb862xx_intr, IRQF_DISABLED, + DRV_NAME, (void *)par)) { + dev_err(dev, "Cannot request irq\n"); + goto io_unmap; + } + + mb862xxfb_init_fbinfo(info); + + if (fb_alloc_cmap(&info->cmap, NR_PALETTE, 0) < 0) { + dev_err(dev, "Could not allocate cmap for fb_info.\n"); + goto free_irq; + } + + if ((info->fbops->fb_set_par)(info)) + dev_err(dev, "set_var() failed on initial setup?\n"); + + if (register_framebuffer(info)) { + dev_err(dev, "failed to register framebuffer\n"); + goto rel_cmap; + } + + dev_set_drvdata(dev, info); + + if (device_create_file(dev, &dev_attr_dispregs)) + dev_err(dev, "Can't create sysfs regdump file\n"); + return 0; + +rel_cmap: + fb_dealloc_cmap(&info->cmap); +free_irq: + outreg(host, GC_IMASK, 0); + free_irq(par->irq, (void *)par); +io_unmap: + iounmap(par->mmio_base); +fb_unmap: + iounmap(par->fb_base); +rel_reg: + release_mem_region(res.start, res_size); +irqdisp: + irq_dispose_mapping(par->irq); +fbrel: + dev_set_drvdata(dev, NULL); + framebuffer_release(info); + return ret; +} + +static int __devexit of_platform_mb862xx_remove(struct of_device *ofdev) +{ + struct fb_info *fbi = dev_get_drvdata(&ofdev->dev); + struct mb862xxfb_par *par = fbi->par; + resource_size_t res_size = 1 + par->res->end - par->res->start; + unsigned long reg; + + dev_dbg(fbi->dev, "%s release\n", fbi->fix.id); + + /* display off */ + reg = inreg(disp, GC_DCM1); + reg &= ~(GC_DCM01_DEN | GC_DCM01_L0E); + outreg(disp, GC_DCM1, reg); + + /* disable interrupts */ + outreg(host, GC_IMASK, 0); + + free_irq(par->irq, (void *)par); + irq_dispose_mapping(par->irq); + + device_remove_file(&ofdev->dev, &dev_attr_dispregs); + + unregister_framebuffer(fbi); + fb_dealloc_cmap(&fbi->cmap); + + iounmap(par->mmio_base); + iounmap(par->fb_base); + + dev_set_drvdata(&ofdev->dev, NULL); + release_mem_region(par->res->start, res_size); + framebuffer_release(fbi); + return 0; +} + +/* + * common types + */ +static struct of_device_id __devinitdata of_platform_mb862xx_tbl[] = { + { .compatible = "fujitsu,MB86276", }, + { .compatible = "fujitsu,lime", }, + { .compatible = "fujitsu,MB86277", }, + { .compatible = "fujitsu,mint", }, + { .compatible = "fujitsu,MB86293", }, + { .compatible = "fujitsu,MB86294", }, + { .compatible = "fujitsu,coral", }, + { /* end */ } +}; + +static struct of_platform_driver of_platform_mb862xxfb_driver = { + .owner = THIS_MODULE, + .name = DRV_NAME, + .match_table = of_platform_mb862xx_tbl, + .probe = of_platform_mb862xx_probe, + .remove = __devexit_p(of_platform_mb862xx_remove), +}; +#endif + +#if defined(CONFIG_FB_MB862XX_PCI_GDC) +static int coralp_init(struct mb862xxfb_par *par) +{ + int cn, ver; + + par->host = par->mmio_base; + par->i2c = par->mmio_base + MB862XX_I2C_BASE; + par->disp = par->mmio_base + MB862XX_DISP_BASE; + par->cap = par->mmio_base + MB862XX_CAP_BASE; + par->draw = par->mmio_base + MB862XX_DRAW_BASE; + par->geo = par->mmio_base + MB862XX_GEO_BASE; + par->pio = par->mmio_base + MB862XX_PIO_BASE; + + par->refclk = GC_DISP_REFCLK_400; + + ver = inreg(host, GC_CID); + cn = (ver & GC_CID_CNAME_MSK) >> 8; + ver = ver & GC_CID_VERSION_MSK; + if (cn == 3) { + dev_info(par->dev, "Fujitsu Coral-%s GDC Rev.%d found\n",\ + (ver == 6) ? "P" : (ver == 8) ? "PA" : "?", + par->pdev->revision); + outreg(host, GC_CCF, GC_CCF_CGE_166 | GC_CCF_COT_133); + udelay(200); + outreg(host, GC_MMR, GC_MMR_CORALP_EVB_VAL); + udelay(10); + /* Clear interrupt status */ + outreg(host, GC_IST, 0); + } else { + return -ENODEV; + } + return 0; +} + +static int init_dram_ctrl(struct mb862xxfb_par *par) +{ + unsigned long i = 0; + + /* + * Set io mode first! Spec. says IC may be destroyed + * if not set to SSTL2/LVCMOS before init. + */ + outreg(dram_ctrl, GC_DCTL_IOCONT1_IOCONT0, GC_EVB_DCTL_IOCONT1_IOCONT0); + + /* DRAM init */ + outreg(dram_ctrl, GC_DCTL_MODE_ADD, GC_EVB_DCTL_MODE_ADD); + outreg(dram_ctrl, GC_DCTL_SETTIME1_EMODE, GC_EVB_DCTL_SETTIME1_EMODE); + outreg(dram_ctrl, GC_DCTL_REFRESH_SETTIME2, + GC_EVB_DCTL_REFRESH_SETTIME2); + outreg(dram_ctrl, GC_DCTL_RSV2_RSV1, GC_EVB_DCTL_RSV2_RSV1); + outreg(dram_ctrl, GC_DCTL_DDRIF2_DDRIF1, GC_EVB_DCTL_DDRIF2_DDRIF1); + outreg(dram_ctrl, GC_DCTL_RSV0_STATES, GC_EVB_DCTL_RSV0_STATES); + + /* DLL reset done? */ + while ((inreg(dram_ctrl, GC_DCTL_RSV0_STATES) & GC_DCTL_STATES_MSK)) { + udelay(GC_DCTL_INIT_WAIT_INTERVAL); + if (i++ > GC_DCTL_INIT_WAIT_CNT) { + dev_err(par->dev, "VRAM init failed.\n"); + return -EINVAL; + } + } + outreg(dram_ctrl, GC_DCTL_MODE_ADD, GC_EVB_DCTL_MODE_ADD_AFT_RST); + outreg(dram_ctrl, GC_DCTL_RSV0_STATES, GC_EVB_DCTL_RSV0_STATES_AFT_RST); + return 0; +} + +static int carmine_init(struct mb862xxfb_par *par) +{ + unsigned long reg; + + par->ctrl = par->mmio_base + MB86297_CTRL_BASE; + par->i2c = par->mmio_base + MB86297_I2C_BASE; + par->disp = par->mmio_base + MB86297_DISP0_BASE; + par->disp1 = par->mmio_base + MB86297_DISP1_BASE; + par->cap = par->mmio_base + MB86297_CAP0_BASE; + par->cap1 = par->mmio_base + MB86297_CAP1_BASE; + par->draw = par->mmio_base + MB86297_DRAW_BASE; + par->dram_ctrl = par->mmio_base + MB86297_DRAMCTRL_BASE; + par->wrback = par->mmio_base + MB86297_WRBACK_BASE; + + par->refclk = GC_DISP_REFCLK_533; + + /* warm up */ + reg = GC_CTRL_CLK_EN_DRAM | GC_CTRL_CLK_EN_2D3D | GC_CTRL_CLK_EN_DISP0; + outreg(ctrl, GC_CTRL_CLK_ENABLE, reg); + + /* check for engine module revision */ + if (inreg(draw, GC_2D3D_REV) == GC_RE_REVISION) + dev_info(par->dev, "Fujitsu Carmine GDC Rev.%d found\n", + par->pdev->revision); + else + goto err_init; + + reg &= ~GC_CTRL_CLK_EN_2D3D; + outreg(ctrl, GC_CTRL_CLK_ENABLE, reg); + + /* set up vram */ + if (init_dram_ctrl(par) < 0) + goto err_init; + + outreg(ctrl, GC_CTRL_INT_MASK, 0); + return 0; + +err_init: + outreg(ctrl, GC_CTRL_CLK_ENABLE, 0); + return -EINVAL; +} + +static inline int mb862xx_pci_gdc_init(struct mb862xxfb_par *par) +{ + switch (par->type) { + case BT_CORALP: + return coralp_init(par); + case BT_CARMINE: + return carmine_init(par); + default: + return -ENODEV; + } +} + +#define CHIP_ID(id) \ + { PCI_DEVICE(PCI_VENDOR_ID_FUJITSU_LIMITED, id) } + +static struct pci_device_id mb862xx_pci_tbl[] __devinitdata = { + /* MB86295/MB86296 */ + CHIP_ID(PCI_DEVICE_ID_FUJITSU_CORALP), + CHIP_ID(PCI_DEVICE_ID_FUJITSU_CORALPA), + /* MB86297 */ + CHIP_ID(PCI_DEVICE_ID_FUJITSU_CARMINE), + { 0, } +}; + +MODULE_DEVICE_TABLE(pci, mb862xx_pci_tbl); + +static int __devinit mb862xx_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct mb862xxfb_par *par; + struct fb_info *info; + struct device *dev = &pdev->dev; + int ret; + + ret = pci_enable_device(pdev); + if (ret < 0) { + dev_err(dev, "Cannot enable PCI device\n"); + goto out; + } + + info = framebuffer_alloc(sizeof(struct mb862xxfb_par), dev); + if (!info) { + dev_err(dev, "framebuffer alloc failed\n"); + ret = -ENOMEM; + goto dis_dev; + } + + par = info->par; + par->info = info; + par->dev = dev; + par->pdev = pdev; + par->irq = pdev->irq; + + ret = pci_request_regions(pdev, DRV_NAME); + if (ret < 0) { + dev_err(dev, "Cannot reserve region(s) for PCI device\n"); + goto rel_fb; + } + + switch (pdev->device) { + case PCI_DEVICE_ID_FUJITSU_CORALP: + case PCI_DEVICE_ID_FUJITSU_CORALPA: + par->fb_base_phys = pci_resource_start(par->pdev, 0); + par->mapped_vram = CORALP_MEM_SIZE; + par->mmio_base_phys = par->fb_base_phys + MB862XX_MMIO_BASE; + par->mmio_len = MB862XX_MMIO_SIZE; + par->type = BT_CORALP; + break; + case PCI_DEVICE_ID_FUJITSU_CARMINE: + par->fb_base_phys = pci_resource_start(par->pdev, 2); + par->mmio_base_phys = pci_resource_start(par->pdev, 3); + par->mmio_len = pci_resource_len(par->pdev, 3); + par->mapped_vram = CARMINE_MEM_SIZE; + par->type = BT_CARMINE; + break; + default: + /* should never occur */ + goto rel_reg; + } + + par->fb_base = ioremap(par->fb_base_phys, par->mapped_vram); + if (par->fb_base == NULL) { + dev_err(dev, "Cannot map framebuffer\n"); + goto rel_reg; + } + + par->mmio_base = ioremap(par->mmio_base_phys, par->mmio_len); + if (par->mmio_base == NULL) { + dev_err(dev, "Cannot map registers\n"); + ret = -EIO; + goto fb_unmap; + } + + dev_dbg(dev, "fb phys 0x%llx 0x%lx\n", + (u64)par->fb_base_phys, (ulong)par->mapped_vram); + dev_dbg(dev, "mmio phys 0x%llx 0x%lx\n", + (u64)par->mmio_base_phys, (ulong)par->mmio_len); + + if (mb862xx_pci_gdc_init(par)) + goto io_unmap; + + if (request_irq(par->irq, mb862xx_intr, IRQF_DISABLED | IRQF_SHARED, + DRV_NAME, (void *)par)) { + dev_err(dev, "Cannot request irq\n"); + goto io_unmap; + } + + mb862xxfb_init_fbinfo(info); + + if (fb_alloc_cmap(&info->cmap, NR_PALETTE, 0) < 0) { + dev_err(dev, "Could not allocate cmap for fb_info.\n"); + ret = -ENOMEM; + goto free_irq; + } + + if ((info->fbops->fb_set_par)(info)) + dev_err(dev, "set_var() failed on initial setup?\n"); + + ret = register_framebuffer(info); + if (ret < 0) { + dev_err(dev, "failed to register framebuffer\n"); + goto rel_cmap; + } + + pci_set_drvdata(pdev, info); + + if (device_create_file(dev, &dev_attr_dispregs)) + dev_err(dev, "Can't create sysfs regdump file\n"); + + if (par->type == BT_CARMINE) + outreg(ctrl, GC_CTRL_INT_MASK, GC_CARMINE_INT_EN); + else + outreg(host, GC_IMASK, GC_INT_EN); + + return 0; + +rel_cmap: + fb_dealloc_cmap(&info->cmap); +free_irq: + free_irq(par->irq, (void *)par); +io_unmap: + iounmap(par->mmio_base); +fb_unmap: + iounmap(par->fb_base); +rel_reg: + pci_release_regions(pdev); +rel_fb: + framebuffer_release(info); +dis_dev: + pci_disable_device(pdev); +out: + return ret; +} + +static void __devexit mb862xx_pci_remove(struct pci_dev *pdev) +{ + struct fb_info *fbi = pci_get_drvdata(pdev); + struct mb862xxfb_par *par = fbi->par; + unsigned long reg; + + dev_dbg(fbi->dev, "%s release\n", fbi->fix.id); + + /* display off */ + reg = inreg(disp, GC_DCM1); + reg &= ~(GC_DCM01_DEN | GC_DCM01_L0E); + outreg(disp, GC_DCM1, reg); + + if (par->type == BT_CARMINE) { + outreg(ctrl, GC_CTRL_INT_MASK, 0); + outreg(ctrl, GC_CTRL_CLK_ENABLE, 0); + } else { + outreg(host, GC_IMASK, 0); + } + + device_remove_file(&pdev->dev, &dev_attr_dispregs); + + pci_set_drvdata(pdev, NULL); + unregister_framebuffer(fbi); + fb_dealloc_cmap(&fbi->cmap); + + free_irq(par->irq, (void *)par); + iounmap(par->mmio_base); + iounmap(par->fb_base); + + pci_release_regions(pdev); + framebuffer_release(fbi); + pci_disable_device(pdev); +} + +static struct pci_driver mb862xxfb_pci_driver = { + .name = DRV_NAME, + .id_table = mb862xx_pci_tbl, + .probe = mb862xx_pci_probe, + .remove = __devexit_p(mb862xx_pci_remove), +}; +#endif + +static int __devinit mb862xxfb_init(void) +{ + int ret = -ENODEV; + +#if defined(CONFIG_FB_MB862XX_LIME) + ret = of_register_platform_driver(&of_platform_mb862xxfb_driver); +#endif +#if defined(CONFIG_FB_MB862XX_PCI_GDC) + ret = pci_register_driver(&mb862xxfb_pci_driver); +#endif + return ret; +} + +static void __exit mb862xxfb_exit(void) +{ +#if defined(CONFIG_FB_MB862XX_LIME) + of_unregister_platform_driver(&of_platform_mb862xxfb_driver); +#endif +#if defined(CONFIG_FB_MB862XX_PCI_GDC) + pci_unregister_driver(&mb862xxfb_pci_driver); +#endif +} + +module_init(mb862xxfb_init); +module_exit(mb862xxfb_exit); + +MODULE_DESCRIPTION("Fujitsu MB862xx Framebuffer driver"); +MODULE_AUTHOR("Anatolij Gustschin "); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/video/mb862xx/mb862xxfb.h b/drivers/video/mb862xx/mb862xxfb.h new file mode 100644 index 000000000000..c4c8f4dd2217 --- /dev/null +++ b/drivers/video/mb862xx/mb862xxfb.h @@ -0,0 +1,83 @@ +#ifndef __MB862XX_H__ +#define __MB862XX_H__ + +#define PCI_VENDOR_ID_FUJITSU_LIMITED 0x10cf +#define PCI_DEVICE_ID_FUJITSU_CORALP 0x2019 +#define PCI_DEVICE_ID_FUJITSU_CORALPA 0x201e +#define PCI_DEVICE_ID_FUJITSU_CARMINE 0x202b + +#define GC_MMR_CORALP_EVB_VAL 0x11d7fa13 + +enum gdctype { + BT_NONE, + BT_LIME, + BT_MINT, + BT_CORAL, + BT_CORALP, + BT_CARMINE, +}; + +struct mb862xx_gc_mode { + struct fb_videomode def_mode; /* mode of connected display */ + unsigned int def_bpp; /* default depth */ + unsigned long max_vram; /* connected SDRAM size */ + unsigned long ccf; /* gdc clk */ + unsigned long mmr; /* memory mode for SDRAM */ +}; + +/* private data */ +struct mb862xxfb_par { + struct fb_info *info; /* fb info head */ + struct device *dev; + struct pci_dev *pdev; + struct resource *res; /* framebuffer/mmio resource */ + + resource_size_t fb_base_phys; /* fb base, 36-bit PPC440EPx */ + resource_size_t mmio_base_phys; /* io base addr */ + void __iomem *fb_base; /* remapped framebuffer */ + void __iomem *mmio_base; /* remapped registers */ + size_t mapped_vram; /* length of remapped vram */ + size_t mmio_len; /* length of register region */ + + void __iomem *host; /* relocatable reg. bases */ + void __iomem *i2c; + void __iomem *disp; + void __iomem *disp1; + void __iomem *cap; + void __iomem *cap1; + void __iomem *draw; + void __iomem *geo; + void __iomem *pio; + void __iomem *ctrl; + void __iomem *dram_ctrl; + void __iomem *wrback; + + unsigned int irq; + unsigned int type; /* GDC type */ + unsigned int refclk; /* disp. reference clock */ + struct mb862xx_gc_mode *gc_mode; /* GDC mode init data */ + int pre_init; /* don't init display if 1 */ + + u32 pseudo_palette[16]; +}; + +#if defined(CONFIG_FB_MB862XX_LIME) && defined(CONFIG_FB_MB862XX_PCI_GDC) +#error "Select Lime GDC or CoralP/Carmine support, but not both together" +#endif +#if defined(CONFIG_FB_MB862XX_LIME) +#define gdc_read __raw_readl +#define gdc_write __raw_writel +#else +#define gdc_read readl +#define gdc_write writel +#endif + +#define inreg(type, off) \ + gdc_read((par->type + (off))) + +#define outreg(type, off, val) \ + gdc_write((val), (par->type + (off))) + +#define pack(a, b) (((a) << 16) | (b)) + +#endif From 0aedadf91a70a11c4a3e7c7d99b21e5528af8d5d Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 6 Nov 2008 12:53:30 -0800 Subject: [PATCH 136/173] mm: move migrate_prep out from under mmap_sem Move the migrate_prep outside the mmap_sem for the following system calls 1. sys_move_pages 2. sys_migrate_pages 3. sys_mbind() It really does not matter when we flush the lru. The system is free to add pages onto the lru even during migration which will make the page migration either skip the page (mbind, migrate_pages) or return a busy state (move_pages). Fixes this lockdep warning (and potential deadlock): Some VM place has mmap_sem -> kevent_wq via lru_add_drain_all() net/core/dev.c::dev_ioctl() has rtnl_lock -> mmap_sem (*) the ioctl has copy_from_user() and it can do page fault. linkwatch_event has kevent_wq -> rtnl_lock Signed-off-by: Christoph Lameter Cc: KOSAKI Motohiro Reported-by: Heiko Carstens Cc: Nick Piggin Cc: Hugh Dickins Cc: Rik van Riel Cc: Lee Schermerhorn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 18 +++++++++++------- mm/migrate.c | 2 +- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 36f42573a335..e9493b1c1117 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -489,12 +489,6 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, int err; struct vm_area_struct *first, *vma, *prev; - if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { - - err = migrate_prep(); - if (err) - return ERR_PTR(err); - } first = find_vma(mm, start); if (!first) @@ -809,9 +803,13 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags) { int busy = 0; - int err = 0; + int err; nodemask_t tmp; + err = migrate_prep(); + if (err) + return err; + down_read(&mm->mmap_sem); err = migrate_vmas(mm, from_nodes, to_nodes, flags); @@ -974,6 +972,12 @@ static long do_mbind(unsigned long start, unsigned long len, start, start + len, mode, mode_flags, nmask ? nodes_addr(*nmask)[0] : -1); + if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { + + err = migrate_prep(); + if (err) + return err; + } down_write(&mm->mmap_sem); vma = check_range(mm, start, end, nmask, flags | MPOL_MF_INVERT, &pagelist); diff --git a/mm/migrate.c b/mm/migrate.c index 6602941bfab0..385db89f0c33 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -841,12 +841,12 @@ static int do_move_page_to_node_array(struct mm_struct *mm, struct page_to_node *pp; LIST_HEAD(pagelist); + migrate_prep(); down_read(&mm->mmap_sem); /* * Build a list of pages to migrate */ - migrate_prep(); for (pp = pm; pp->node != MAX_NUMNODES; pp++) { struct vm_area_struct *vma; struct page *page; From b41ad14c30acf023d09ac064096a4cf41248ce46 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Thu, 6 Nov 2008 12:53:31 -0800 Subject: [PATCH 137/173] vmemmap: warn about page_structs with remote distance It's insufficient to simply compare node ids when warning about offnode page_structs since it's possible to still have local affinity. Acked-by: Christoph Lameter Signed-off-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/sparse-vmemmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index a91b5f8fcaf6..a13ea6401ae7 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -64,7 +64,7 @@ void __meminit vmemmap_verify(pte_t *pte, int node, unsigned long pfn = pte_pfn(*pte); int actual_node = early_pfn_to_nid(pfn); - if (actual_node != node) + if (node_distance(actual_node, node) > LOCAL_DISTANCE) printk(KERN_WARNING "[%lx-%lx] potential offnode " "page_structs\n", start, end - 1); } From 24eb089950ce44603b30a3145a2c8520e2b55bb1 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 6 Nov 2008 12:53:32 -0800 Subject: [PATCH 138/173] cgroups: fix invalid cgrp->dentry before cgroup has been completely removed This fixes an oops when reading /proc/sched_debug. A cgroup won't be removed completely until finishing cgroup_diput(), so we shouldn't invalidate cgrp->dentry in cgroup_rmdir(). Otherwise, when a group is being removed while cgroup_path() gets called, we may trigger NULL dereference BUG. The bug can be reproduced: # cat test.sh #!/bin/sh mount -t cgroup -o cpu xxx /mnt for (( ; ; )) { mkdir /mnt/sub rmdir /mnt/sub } # ./test.sh & # cat /proc/sched_debug BUG: unable to handle kernel NULL pointer dereference at 00000038 IP: [] cgroup_path+0x39/0x90 ... Call Trace: [] ? print_cfs_rq+0x6e/0x75d [] ? sched_debug_show+0x72d/0xc1e ... Signed-off-by: Li Zefan Acked-by: Paul Menage Cc: Peter Zijlstra Cc: Ingo Molnar Cc: [2.6.26.x, 2.6.27.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 35eebd5510c2..358e77564e6f 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2497,7 +2497,6 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) list_del(&cgrp->sibling); spin_lock(&cgrp->dentry->d_lock); d = dget(cgrp->dentry); - cgrp->dentry = NULL; spin_unlock(&d->d_lock); cgroup_d_remove_dir(d); From 1b6bcdbe7eaacde19b5d633b33c8d056e4818de0 Mon Sep 17 00:00:00 2001 From: Tim Hockin Date: Thu, 6 Nov 2008 12:53:33 -0800 Subject: [PATCH 139/173] Documentation/email-clients.txt: add some info about gmail Signed-off-by: Tim Hockin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/email-clients.txt | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/Documentation/email-clients.txt b/Documentation/email-clients.txt index 2ebb94d6ed8e..a618efab7b15 100644 --- a/Documentation/email-clients.txt +++ b/Documentation/email-clients.txt @@ -213,4 +213,29 @@ TkRat (GUI) Works. Use "Insert file..." or external editor. +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Gmail (Web GUI) + +If you just have to use Gmail to send patches, it CAN be made to work. It +requires a bit of external help, though. + +The first problem is that Gmail converts tabs to spaces. This will +totally break your patches. To prevent this, you have to use a different +editor. There is a firefox extension called "ViewSourceWith" +(https://addons.mozilla.org/en-US/firefox/addon/394) which allows you to +edit any text box in the editor of your choice. Configure it to launch +your favorite editor. When you want to send a patch, use this technique. +Once you have crafted your messsage + patch, save and exit the editor, +which should reload the Gmail edit box. GMAIL WILL PRESERVE THE TABS. +Hoorah. Apparently you can cut-n-paste literal tabs, but Gmail will +convert those to spaces upon sending! + +The second problem is that Gmail converts tabs to spaces on replies. If +you reply to a patch, don't expect to be able to apply it as a patch. + +The last problem is that Gmail will base64-encode any message that has a +non-ASCII character. That includes things like European names. Be aware. + +Gmail is not convenient for lkml patches, but CAN be made to work. + ### From fd96feb2583688ad13d8467ded442f9c8d73cc4b Mon Sep 17 00:00:00 2001 From: dann frazier Date: Thu, 6 Nov 2008 12:53:34 -0800 Subject: [PATCH 140/173] cciss: add P700m to list of supported controllers P700m support was added in: 9cff3b383dad193b0762c27278a16237e10b53dc Update cciss.txt to match. Signed-off-by: dann frazier Acked-by: Mike Miller Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cciss.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/cciss.txt b/Documentation/cciss.txt index 48d80d95f0f8..89698e8df7d4 100644 --- a/Documentation/cciss.txt +++ b/Documentation/cciss.txt @@ -21,6 +21,7 @@ This driver is known to work with the following cards: * SA E200 * SA E200i * SA E500 + * SA P700m * SA P212 * SA P410 * SA P410i From fbdd12676c83df77480f00ebd32fc98fbe3bf836 Mon Sep 17 00:00:00 2001 From: Qinghuang Feng Date: Thu, 6 Nov 2008 12:53:34 -0800 Subject: [PATCH 141/173] mm/oom_kill.c: fix badness() kerneldoc Paramter @mem has been removed since v2.6.26, now delete it's comment. Signed-off-by: Qinghuang Feng Acked-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/oom_kill.c | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 2846a58e5de9..a0a01902f551 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -38,7 +38,6 @@ static DEFINE_SPINLOCK(zone_scan_mutex); * badness - calculate a numeric value for how bad this task has been * @p: task struct of which task we should calculate * @uptime: current uptime in seconds - * @mem: target memory controller * * The formula used is relatively simple and documented inline in the * function. The main rationale is that we want to select a good task From c87591b719737b4e91eb1a9fa8fd55a4ff1886d6 Mon Sep 17 00:00:00 2001 From: Arthur Jones Date: Thu, 6 Nov 2008 12:53:35 -0800 Subject: [PATCH 142/173] ext3: wait on all pending commits in ext3_sync_fs In ext3_sync_fs, we only wait for a commit to finish if we started it, but there may be one already in progress which will not be synced. In the case of a data=ordered umount with pending long symlinks which are delayed due to a long list of other I/O on the backing block device, this causes the buffer associated with the long symlinks to not be moved to the inode dirty list in the second phase of fsync_super. Then, before they can be dirtied again, kjournald exits, seeing the UMOUNT flag and the dirty pages are never written to the backing block device, causing long symlink corruption and exposing new or previously freed block data to userspace. This can be reproduced with a script created by Eric Sandeen : #!/bin/bash umount /mnt/test2 mount /dev/sdb4 /mnt/test2 rm -f /mnt/test2/* dd if=/dev/zero of=/mnt/test2/bigfile bs=1M count=512 touch /mnt/test2/thisisveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryverylongfilename ln -s /mnt/test2/thisisveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryverylongfilename /mnt/test2/link umount /mnt/test2 mount /dev/sdb4 /mnt/test2 ls /mnt/test2/ umount /mnt/test2 To ensure all commits are synced, we flush all journal commits now when sync_fs'ing ext3. Signed-off-by: Arthur Jones Cc: Eric Sandeen Cc: Theodore Ts'o Cc: Cc: [2.6.everything] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext3/super.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/ext3/super.c b/fs/ext3/super.c index e5717a4fae67..5dec6d1356c4 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2390,13 +2390,12 @@ static void ext3_write_super (struct super_block * sb) static int ext3_sync_fs(struct super_block *sb, int wait) { - tid_t target; - sb->s_dirt = 0; - if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) { - if (wait) - log_wait_commit(EXT3_SB(sb)->s_journal, target); - } + if (wait) + ext3_force_commit(sb); + else + journal_start_commit(EXT3_SB(sb)->s_journal, NULL); + return 0; } From a70dcb969f64e2fa98c24f47854f20bf02ff0092 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Thu, 6 Nov 2008 12:53:36 -0800 Subject: [PATCH 143/173] memory hotplug: fix page_zone() calculation in test_pages_isolated() My last bugfix here (adding zone->lock) introduced a new problem: Using page_zone(pfn_to_page(pfn)) to get the zone after the for() loop is wrong. pfn will then be >= end_pfn, which may be in a different zone or not present at all. This may lead to an addressing exception in page_zone() or spin_lock_irqsave(). Now I use __first_valid_page() again after the loop to find a valid page for page_zone(). Signed-off-by: Gerald Schaefer Acked-by: Nathan Fontenot Reviewed-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_isolation.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mm/page_isolation.c b/mm/page_isolation.c index b70a7fec1ff6..5e0ffd967452 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -130,10 +130,11 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn) if (page && get_pageblock_migratetype(page) != MIGRATE_ISOLATE) break; } - if (pfn < end_pfn) + page = __first_valid_page(start_pfn, end_pfn - start_pfn); + if ((pfn < end_pfn) || !page) return -EBUSY; /* Check all pages are free or Marked as ISOLATED */ - zone = page_zone(pfn_to_page(pfn)); + zone = page_zone(page); spin_lock_irqsave(&zone->lock, flags); ret = __test_page_isolated_in_pageblock(start_pfn, end_pfn); spin_unlock_irqrestore(&zone->lock, flags); From a684e7d33096892093456dd56a582cfc3bfad648 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 6 Nov 2008 12:53:37 -0800 Subject: [PATCH 144/173] fbdev: fix fb_compat_ioctl() deadlocks commit 3e680aae4e53ab54cdbb0c29257dae0cbb158e1c ("fb: convert lock/unlock_kernel() into local fb mutex") introduced several deadlocks in the fb_compat_ioctl() path, as mutex_lock() doesn't allow recursion, unlike lock_kernel(). This broke frame buffer applications on 64-bit systems with a 32-bit userland. commit 120a37470c2831fea49fdebaceb5a7039f700ce6 ("framebuffer compat_ioctl deadlock") fixed one of the deadlocks. This patch fixes the remaining deadlocks: - Revert commit 120a37470c2831fea49fdebaceb5a7039f700ce6, - Extract the core logic of fb_ioctl() into a new function do_fb_ioctl(), - Change all callsites of fb_ioctl() where info->lock is already held to call do_fb_ioctl() instead, - Add sparse annotations to all routines that take info->lock. Signed-off-by: Geert Uytterhoeven Cc: Mikulas Patocka Cc: Krzysztof Helt Cc: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/fbmem.c | 63 ++++++++++++++++++++++++++----------------- 1 file changed, 39 insertions(+), 24 deletions(-) diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c index 6048b55f2878..1d5ae39cb271 100644 --- a/drivers/video/fbmem.c +++ b/drivers/video/fbmem.c @@ -1002,13 +1002,9 @@ fb_blank(struct fb_info *info, int blank) return ret; } -static long -fb_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) +static long do_fb_ioctl(struct fb_info *info, unsigned int cmd, + unsigned long arg) { - struct inode *inode = file->f_path.dentry->d_inode; - int fbidx = iminor(inode); - struct fb_info *info; struct fb_ops *fb; struct fb_var_screeninfo var; struct fb_fix_screeninfo fix; @@ -1018,14 +1014,10 @@ fb_ioctl(struct file *file, unsigned int cmd, void __user *argp = (void __user *)arg; long ret = 0; - info = registered_fb[fbidx]; - mutex_lock(&info->lock); fb = info->fbops; - - if (!fb) { - mutex_unlock(&info->lock); + if (!fb) return -ENODEV; - } + switch (cmd) { case FBIOGET_VSCREENINFO: ret = copy_to_user(argp, &info->var, @@ -1126,6 +1118,21 @@ fb_ioctl(struct file *file, unsigned int cmd, else ret = fb->fb_ioctl(info, cmd, arg); } + return ret; +} + +static long fb_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +__acquires(&info->lock) +__releases(&info->lock) +{ + struct inode *inode = file->f_path.dentry->d_inode; + int fbidx = iminor(inode); + struct fb_info *info; + long ret; + + info = registered_fb[fbidx]; + mutex_lock(&info->lock); + ret = do_fb_ioctl(info, cmd, arg); mutex_unlock(&info->lock); return ret; } @@ -1157,8 +1164,8 @@ struct fb_cmap32 { compat_caddr_t transp; }; -static int fb_getput_cmap(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) +static int fb_getput_cmap(struct fb_info *info, unsigned int cmd, + unsigned long arg) { struct fb_cmap_user __user *cmap; struct fb_cmap32 __user *cmap32; @@ -1181,7 +1188,7 @@ static int fb_getput_cmap(struct inode *inode, struct file *file, put_user(compat_ptr(data), &cmap->transp)) return -EFAULT; - err = fb_ioctl(file, cmd, (unsigned long) cmap); + err = do_fb_ioctl(info, cmd, (unsigned long) cmap); if (!err) { if (copy_in_user(&cmap32->start, @@ -1223,8 +1230,8 @@ static int do_fscreeninfo_to_user(struct fb_fix_screeninfo *fix, return err; } -static int fb_get_fscreeninfo(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) +static int fb_get_fscreeninfo(struct fb_info *info, unsigned int cmd, + unsigned long arg) { mm_segment_t old_fs; struct fb_fix_screeninfo fix; @@ -1235,7 +1242,7 @@ static int fb_get_fscreeninfo(struct inode *inode, struct file *file, old_fs = get_fs(); set_fs(KERNEL_DS); - err = fb_ioctl(file, cmd, (unsigned long) &fix); + err = do_fb_ioctl(info, cmd, (unsigned long) &fix); set_fs(old_fs); if (!err) @@ -1244,8 +1251,10 @@ static int fb_get_fscreeninfo(struct inode *inode, struct file *file, return err; } -static long -fb_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +static long fb_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +__acquires(&info->lock) +__releases(&info->lock) { struct inode *inode = file->f_path.dentry->d_inode; int fbidx = iminor(inode); @@ -1262,16 +1271,16 @@ fb_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case FBIOPUT_CON2FBMAP: arg = (unsigned long) compat_ptr(arg); case FBIOBLANK: - mutex_unlock(&info->lock); - return fb_ioctl(file, cmd, arg); + ret = do_fb_ioctl(info, cmd, arg); + break; case FBIOGET_FSCREENINFO: - ret = fb_get_fscreeninfo(inode, file, cmd, arg); + ret = fb_get_fscreeninfo(info, cmd, arg); break; case FBIOGETCMAP: case FBIOPUTCMAP: - ret = fb_getput_cmap(inode, file, cmd, arg); + ret = fb_getput_cmap(info, cmd, arg); break; default: @@ -1286,6 +1295,8 @@ fb_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) static int fb_mmap(struct file *file, struct vm_area_struct * vma) +__acquires(&info->lock) +__releases(&info->lock) { int fbidx = iminor(file->f_path.dentry->d_inode); struct fb_info *info = registered_fb[fbidx]; @@ -1339,6 +1350,8 @@ fb_mmap(struct file *file, struct vm_area_struct * vma) static int fb_open(struct inode *inode, struct file *file) +__acquires(&info->lock) +__releases(&info->lock) { int fbidx = iminor(inode); struct fb_info *info; @@ -1374,6 +1387,8 @@ fb_open(struct inode *inode, struct file *file) static int fb_release(struct inode *inode, struct file *file) +__acquires(&info->lock) +__releases(&info->lock) { struct fb_info * const info = file->private_data; From b225d44e27521290faca2e0f9b1a4a8c74dc510a Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 6 Nov 2008 12:53:39 -0800 Subject: [PATCH 145/173] Documentation/kernel-parameters.txt: update 'isolcpus' kernel option cpuset can be used to move a process onto or off an isolated CPU. Signed-off-by: Li Zefan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 1bbcaa8982b6..8a8cb0ccc5f4 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -995,13 +995,15 @@ and is between 256 and 4096 characters. It is defined in the file Format: ,..., or - - (must be a positive range in ascending order) + - + (must be a positive range in ascending order) or a mixture ,...,- + This option can be used to specify one or more CPUs to isolate from the general SMP balancing and scheduling - algorithms. The only way to move a process onto or off - an "isolated" CPU is via the CPU affinity syscalls. + algorithms. You can move a process onto or off an + "isolated" CPU via the CPU affinity syscalls or cpuset. begins at 0 and the maximum value is "number of CPUs in system - 1". From 06a7f058761cd232cab42d5c7da82f7255b51d5b Mon Sep 17 00:00:00 2001 From: David Brownell Date: Thu, 6 Nov 2008 12:53:40 -0800 Subject: [PATCH 146/173] atmel_serial: keep clock off when it's not needed The atmel_serial driver is mismanaging its clock by leaving it on at all times ... the whole point of clock management is to leave it off unless it's actively needed, which conserves power!! Although the kernel doesn't actually hang without my fix, it does discard quite a lot of early console output. The result still looks correct: usart users= 1 on 35000000 Hz, for atmel_usart.0 usart users= 0 off 35000000 Hz, for atmel_usart.2 when using ttyS0 as serial console. [haavard.skinnemoen@atmel.com: Make sure clock is enabled early for console] Signed-off-by: David Brownell Signed-off-by: Haavard Skinnemoen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/serial/atmel_serial.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/serial/atmel_serial.c b/drivers/serial/atmel_serial.c index 61fb8b6d19af..d5efd6c77904 100644 --- a/drivers/serial/atmel_serial.c +++ b/drivers/serial/atmel_serial.c @@ -1258,6 +1258,8 @@ static void __devinit atmel_init_port(struct atmel_uart_port *atmel_port, atmel_port->clk = clk_get(&pdev->dev, "usart"); clk_enable(atmel_port->clk); port->uartclk = clk_get_rate(atmel_port->clk); + clk_disable(atmel_port->clk); + /* only enable clock when USART is in use */ } atmel_port->use_dma_rx = data->use_dma_rx; @@ -1379,6 +1381,8 @@ static int __init atmel_console_setup(struct console *co, char *options) return -ENODEV; } + clk_enable(atmel_ports[co->index].clk); + UART_PUT_IDR(port, -1); UART_PUT_CR(port, ATMEL_US_RSTSTA | ATMEL_US_RSTRX); UART_PUT_CR(port, ATMEL_US_TXEN | ATMEL_US_RXEN); @@ -1403,7 +1407,7 @@ static struct console atmel_console = { .data = &atmel_uart, }; -#define ATMEL_CONSOLE_DEVICE &atmel_console +#define ATMEL_CONSOLE_DEVICE (&atmel_console) /* * Early console initialization (before VM subsystem initialized). @@ -1534,6 +1538,15 @@ static int __devinit atmel_serial_probe(struct platform_device *pdev) if (ret) goto err_add_port; + if (atmel_is_console_port(&port->uart) + && ATMEL_CONSOLE_DEVICE->flags & CON_ENABLED) { + /* + * The serial core enabled the clock for us, so undo + * the clk_enable() in atmel_console_setup() + */ + clk_disable(port->clk); + } + device_init_wakeup(&pdev->dev, 1); platform_set_drvdata(pdev, port); @@ -1544,7 +1557,6 @@ static int __devinit atmel_serial_probe(struct platform_device *pdev) port->rx_ring.buf = NULL; err_alloc_ring: if (!atmel_is_console_port(&port->uart)) { - clk_disable(port->clk); clk_put(port->clk); port->clk = NULL; } @@ -1568,7 +1580,6 @@ static int __devexit atmel_serial_remove(struct platform_device *pdev) /* "port" is allocated statically, so we shouldn't free it */ - clk_disable(atmel_port->clk); clk_put(atmel_port->clk); return ret; From 80bb26d4062657c52862d1b112beead47ff9b793 Mon Sep 17 00:00:00 2001 From: Frans Pop Date: Thu, 6 Nov 2008 12:53:41 -0800 Subject: [PATCH 147/173] rtc-cmos: fix boot log message -rtc0: alarms up to one month, y3k, 114 bytes nvram, , hpet irqs irqs +rtc0: alarms up to one month, y3k, 114 bytes nvram, hpet irqs Signed-off-by: Frans Pop Cc: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/rtc-cmos.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 5549231179a2..6cf8e282338f 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -794,7 +794,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) goto cleanup2; } - pr_info("%s: alarms up to one %s%s, %zd bytes nvram, %s irqs\n", + pr_info("%s: alarms up to one %s%s, %zd bytes nvram%s\n", cmos_rtc.rtc->dev.bus_id, is_valid_irq(rtc_irq) ? (cmos_rtc.mon_alrm From c1dfda399ace020126547e7d454ba94edc8c8797 Mon Sep 17 00:00:00 2001 From: Andrew Victor Date: Thu, 6 Nov 2008 12:53:42 -0800 Subject: [PATCH 148/173] SAM9 watchdog: update for moved headers The architecture header files were recently moved from include/asm-arm/mach-at91/ to arch/arm/mach-at91/include/mach/. The SAM9 watchdog driver still includes a header from the old location. Signed-off-by: Andrew Victor Cc: Wim Van Sebroeck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/watchdog/at91sam9_wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/at91sam9_wdt.c b/drivers/watchdog/at91sam9_wdt.c index b4babfc31586..b1da287f90ec 100644 --- a/drivers/watchdog/at91sam9_wdt.c +++ b/drivers/watchdog/at91sam9_wdt.c @@ -30,7 +30,7 @@ #include #include -#include +#include #define DRV_NAME "AT91SAM9 Watchdog" From 8986ab59631b1f2e82ac820c8fd76a34462915ca Mon Sep 17 00:00:00 2001 From: Bart Trojanowski Date: Thu, 6 Nov 2008 12:53:44 -0800 Subject: [PATCH 149/173] fat: document additional vfat mount options While debugging a sync mount regression on vfat I noticed that there were mount options parsed by the driver that were not documented. [hirofumi@mail.parknet.co.jp: fix some parts] Signed-off-by: Bart Trojanowski Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/vfat.txt | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt index bbac4f1d9056..dc9dc73d7d38 100644 --- a/Documentation/filesystems/vfat.txt +++ b/Documentation/filesystems/vfat.txt @@ -8,6 +8,12 @@ if you want to format from within Linux. VFAT MOUNT OPTIONS ---------------------------------------------------------------------- +uid=### -- Set the owner of all files on this filesystem. + The default is the uid of current process. + +gid=### -- Set the group of all files on this filesystem. + The default is the gid of current process. + umask=### -- The permission mask (for files and directories, see umask(1)). The default is the umask of current process. @@ -36,7 +42,7 @@ codepage=### -- Sets the codepage number for converting to shortname characters on FAT filesystem. By default, FAT_DEFAULT_CODEPAGE setting is used. -iocharset=name -- Character set to use for converting between the +iocharset= -- Character set to use for converting between the encoding is used for user visible filename and 16 bit Unicode characters. Long filenames are stored on disk in Unicode format, but Unix for the most part doesn't @@ -86,6 +92,8 @@ check=s|r|n -- Case sensitivity checking setting. r: relaxed, case insensitive n: normal, default setting, currently case insensitive +nocase -- This was deprecated for vfat. Use shortname=win95 instead. + shortname=lower|win95|winnt|mixed -- Shortname display/create setting. lower: convert to lowercase for display, @@ -99,11 +107,23 @@ shortname=lower|win95|winnt|mixed tz=UTC -- Interpret timestamps as UTC rather than local time. This option disables the conversion of timestamps between local time (as used by Windows on FAT) and UTC - (which Linux uses internally). This is particuluarly + (which Linux uses internally). This is particularly useful when mounting devices (like digital cameras) that are set to UTC in order to avoid the pitfalls of local time. +showexec -- If set, the execute permission bits of the file will be + allowed only if the extension part of the name is .EXE, + .COM, or .BAT. Not set by default. + +debug -- Can be set, but unused by the current implementation. + +sys_immutable -- If set, ATTR_SYS attribute on FAT is handled as + IMMUTABLE flag on Linux. Not set by default. + +flush -- If set, the filesystem will try to flush to disk more + early than normal. Not set by default. + : 0,1,yes,no,true,false TODO From 990e194e69009028e029b7d25da68c38241ec4f0 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:45 -0800 Subject: [PATCH 150/173] fat: move fs/vfat/* and fs/msdos/* to fs/fat This just moves those files, but change link order from MSDOS, VFAT to VFAT, MSDOS. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/Makefile | 2 -- fs/fat/Makefile | 6 +++++- fs/{msdos/namei.c => fat/namei_msdos.c} | 0 fs/{vfat/namei.c => fat/namei_vfat.c} | 0 fs/msdos/Makefile | 7 ------- fs/vfat/Makefile | 7 ------- 6 files changed, 5 insertions(+), 17 deletions(-) rename fs/{msdos/namei.c => fat/namei_msdos.c} (100%) rename fs/{vfat/namei.c => fat/namei_vfat.c} (100%) delete mode 100644 fs/msdos/Makefile delete mode 100644 fs/vfat/Makefile diff --git a/fs/Makefile b/fs/Makefile index 2168c902d5ca..d9f8afe6f0c4 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -81,8 +81,6 @@ obj-$(CONFIG_HUGETLBFS) += hugetlbfs/ obj-$(CONFIG_CODA_FS) += coda/ obj-$(CONFIG_MINIX_FS) += minix/ obj-$(CONFIG_FAT_FS) += fat/ -obj-$(CONFIG_MSDOS_FS) += msdos/ -obj-$(CONFIG_VFAT_FS) += vfat/ obj-$(CONFIG_BFS_FS) += bfs/ obj-$(CONFIG_ISO9660_FS) += isofs/ obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+ diff --git a/fs/fat/Makefile b/fs/fat/Makefile index bfb5f06cf2c8..e06190322c1c 100644 --- a/fs/fat/Makefile +++ b/fs/fat/Makefile @@ -3,5 +3,9 @@ # obj-$(CONFIG_FAT_FS) += fat.o +obj-$(CONFIG_VFAT_FS) += vfat.o +obj-$(CONFIG_MSDOS_FS) += msdos.o -fat-objs := cache.o dir.o fatent.o file.o inode.o misc.o +fat-y := cache.o dir.o fatent.o file.o inode.o misc.o +vfat-y := namei_vfat.o +msdos-y := namei_msdos.o diff --git a/fs/msdos/namei.c b/fs/fat/namei_msdos.c similarity index 100% rename from fs/msdos/namei.c rename to fs/fat/namei_msdos.c diff --git a/fs/vfat/namei.c b/fs/fat/namei_vfat.c similarity index 100% rename from fs/vfat/namei.c rename to fs/fat/namei_vfat.c diff --git a/fs/msdos/Makefile b/fs/msdos/Makefile deleted file mode 100644 index ea67646fcb95..000000000000 --- a/fs/msdos/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -# -# Makefile for the Linux msdos filesystem routines. -# - -obj-$(CONFIG_MSDOS_FS) += msdos.o - -msdos-y := namei.o diff --git a/fs/vfat/Makefile b/fs/vfat/Makefile deleted file mode 100644 index 40f2798a4f08..000000000000 --- a/fs/vfat/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -# -# Makefile for the linux vfat-filesystem routines. -# - -obj-$(CONFIG_VFAT_FS) += vfat.o - -vfat-y := namei.o From 9e975dae2970d22557662761c8505ce9fd165684 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:46 -0800 Subject: [PATCH 151/173] fat: split include/msdos_fs.h This splits __KERNEL__ stuff in include/msdos_fs.h into fs/fat/fat.h. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/cache.c | 2 +- fs/fat/dir.c | 2 +- fs/fat/fat.h | 274 ++++++++++++++++++++++++++++++++++++++ fs/fat/fatent.c | 1 + fs/fat/file.c | 2 +- fs/fat/inode.c | 2 +- fs/fat/misc.c | 2 +- fs/fat/namei_msdos.c | 2 +- fs/fat/namei_vfat.c | 3 +- include/linux/msdos_fs.h | 276 +-------------------------------------- 10 files changed, 284 insertions(+), 282 deletions(-) create mode 100644 fs/fat/fat.h diff --git a/fs/fat/cache.c b/fs/fat/cache.c index 3222f51c41cf..589edde9053c 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c @@ -9,8 +9,8 @@ */ #include -#include #include +#include "fat.h" /* this must be > 0. */ #define FAT_MAX_CACHE 8 diff --git a/fs/fat/dir.c b/fs/fat/dir.c index bae1c3292522..08b23ad25f1c 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -16,11 +16,11 @@ #include #include #include -#include #include #include #include #include +#include "fat.h" static inline loff_t fat_make_i_pos(struct super_block *sb, struct buffer_head *bh, diff --git a/fs/fat/fat.h b/fs/fat/fat.h new file mode 100644 index 000000000000..51f1c42ca5e3 --- /dev/null +++ b/fs/fat/fat.h @@ -0,0 +1,274 @@ +#ifndef _FAT_H +#define _FAT_H + +#include +#include +#include +#include +#include +#include + +/* + * vfat shortname flags + */ +#define VFAT_SFN_DISPLAY_LOWER 0x0001 /* convert to lowercase for display */ +#define VFAT_SFN_DISPLAY_WIN95 0x0002 /* emulate win95 rule for display */ +#define VFAT_SFN_DISPLAY_WINNT 0x0004 /* emulate winnt rule for display */ +#define VFAT_SFN_CREATE_WIN95 0x0100 /* emulate win95 rule for create */ +#define VFAT_SFN_CREATE_WINNT 0x0200 /* emulate winnt rule for create */ + +struct fat_mount_options { + uid_t fs_uid; + gid_t fs_gid; + unsigned short fs_fmask; + unsigned short fs_dmask; + unsigned short codepage; /* Codepage for shortname conversions */ + char *iocharset; /* Charset used for filename input/display */ + unsigned short shortname; /* flags for shortname display/create rule */ + unsigned char name_check; /* r = relaxed, n = normal, s = strict */ + unsigned short allow_utime;/* permission for setting the [am]time */ + unsigned quiet:1, /* set = fake successful chmods and chowns */ + showexec:1, /* set = only set x bit for com/exe/bat */ + sys_immutable:1, /* set = system files are immutable */ + dotsOK:1, /* set = hidden and system files are named '.filename' */ + isvfat:1, /* 0=no vfat long filename support, 1=vfat support */ + utf8:1, /* Use of UTF-8 character set (Default) */ + unicode_xlate:1, /* create escape sequences for unhandled Unicode */ + numtail:1, /* Does first alias have a numeric '~1' type tail? */ + flush:1, /* write things quickly */ + nocase:1, /* Does this need case conversion? 0=need case conversion*/ + usefree:1, /* Use free_clusters for FAT32 */ + tz_utc:1; /* Filesystem timestamps are in UTC */ +}; + +#define FAT_HASH_BITS 8 +#define FAT_HASH_SIZE (1UL << FAT_HASH_BITS) +#define FAT_HASH_MASK (FAT_HASH_SIZE-1) + +/* + * MS-DOS file system in-core superblock data + */ +struct msdos_sb_info { + unsigned short sec_per_clus; /* sectors/cluster */ + unsigned short cluster_bits; /* log2(cluster_size) */ + unsigned int cluster_size; /* cluster size */ + unsigned char fats,fat_bits; /* number of FATs, FAT bits (12 or 16) */ + unsigned short fat_start; + unsigned long fat_length; /* FAT start & length (sec.) */ + unsigned long dir_start; + unsigned short dir_entries; /* root dir start & entries */ + unsigned long data_start; /* first data sector */ + unsigned long max_cluster; /* maximum cluster number */ + unsigned long root_cluster; /* first cluster of the root directory */ + unsigned long fsinfo_sector; /* sector number of FAT32 fsinfo */ + struct mutex fat_lock; + unsigned int prev_free; /* previously allocated cluster number */ + unsigned int free_clusters; /* -1 if undefined */ + unsigned int free_clus_valid; /* is free_clusters valid? */ + struct fat_mount_options options; + struct nls_table *nls_disk; /* Codepage used on disk */ + struct nls_table *nls_io; /* Charset used for input and display */ + const void *dir_ops; /* Opaque; default directory operations */ + int dir_per_block; /* dir entries per block */ + int dir_per_block_bits; /* log2(dir_per_block) */ + + int fatent_shift; + struct fatent_operations *fatent_ops; + + spinlock_t inode_hash_lock; + struct hlist_head inode_hashtable[FAT_HASH_SIZE]; +}; + +#define FAT_CACHE_VALID 0 /* special case for valid cache */ + +/* + * MS-DOS file system inode data in memory + */ +struct msdos_inode_info { + spinlock_t cache_lru_lock; + struct list_head cache_lru; + int nr_caches; + /* for avoiding the race between fat_free() and fat_get_cluster() */ + unsigned int cache_valid_id; + + loff_t mmu_private; + int i_start; /* first cluster or 0 */ + int i_logstart; /* logical first cluster */ + int i_attrs; /* unused attribute bits */ + loff_t i_pos; /* on-disk position of directory entry or 0 */ + struct hlist_node i_fat_hash; /* hash by i_location */ + struct inode vfs_inode; +}; + +struct fat_slot_info { + loff_t i_pos; /* on-disk position of directory entry */ + loff_t slot_off; /* offset for slot or de start */ + int nr_slots; /* number of slots + 1(de) in filename */ + struct msdos_dir_entry *de; + struct buffer_head *bh; +}; + +static inline struct msdos_sb_info *MSDOS_SB(struct super_block *sb) +{ + return sb->s_fs_info; +} + +static inline struct msdos_inode_info *MSDOS_I(struct inode *inode) +{ + return container_of(inode, struct msdos_inode_info, vfs_inode); +} + +/* Return the FAT attribute byte for this inode */ +static inline u8 fat_attr(struct inode *inode) +{ + return ((inode->i_mode & S_IWUGO) ? ATTR_NONE : ATTR_RO) | + (S_ISDIR(inode->i_mode) ? ATTR_DIR : ATTR_NONE) | + MSDOS_I(inode)->i_attrs; +} + +static inline unsigned char fat_checksum(const __u8 *name) +{ + unsigned char s = name[0]; + s = (s<<7) + (s>>1) + name[1]; s = (s<<7) + (s>>1) + name[2]; + s = (s<<7) + (s>>1) + name[3]; s = (s<<7) + (s>>1) + name[4]; + s = (s<<7) + (s>>1) + name[5]; s = (s<<7) + (s>>1) + name[6]; + s = (s<<7) + (s>>1) + name[7]; s = (s<<7) + (s>>1) + name[8]; + s = (s<<7) + (s>>1) + name[9]; s = (s<<7) + (s>>1) + name[10]; + return s; +} + +static inline sector_t fat_clus_to_blknr(struct msdos_sb_info *sbi, int clus) +{ + return ((sector_t)clus - FAT_START_ENT) * sbi->sec_per_clus + + sbi->data_start; +} + +static inline void fat16_towchar(wchar_t *dst, const __u8 *src, size_t len) +{ +#ifdef __BIG_ENDIAN + while (len--) { + *dst++ = src[0] | (src[1] << 8); + src += 2; + } +#else + memcpy(dst, src, len * 2); +#endif +} + +static inline void fatwchar_to16(__u8 *dst, const wchar_t *src, size_t len) +{ +#ifdef __BIG_ENDIAN + while (len--) { + dst[0] = *src & 0x00FF; + dst[1] = (*src & 0xFF00) >> 8; + dst += 2; + src++; + } +#else + memcpy(dst, src, len * 2); +#endif +} + +/* fat/cache.c */ +extern void fat_cache_inval_inode(struct inode *inode); +extern int fat_get_cluster(struct inode *inode, int cluster, + int *fclus, int *dclus); +extern int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, + unsigned long *mapped_blocks); + +/* fat/dir.c */ +extern const struct file_operations fat_dir_operations; +extern int fat_search_long(struct inode *inode, const unsigned char *name, + int name_len, struct fat_slot_info *sinfo); +extern int fat_dir_empty(struct inode *dir); +extern int fat_subdirs(struct inode *dir); +extern int fat_scan(struct inode *dir, const unsigned char *name, + struct fat_slot_info *sinfo); +extern int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh, + struct msdos_dir_entry **de, loff_t *i_pos); +extern int fat_alloc_new_dir(struct inode *dir, struct timespec *ts); +extern int fat_add_entries(struct inode *dir, void *slots, int nr_slots, + struct fat_slot_info *sinfo); +extern int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo); + +/* fat/fatent.c */ +struct fat_entry { + int entry; + union { + u8 *ent12_p[2]; + __le16 *ent16_p; + __le32 *ent32_p; + } u; + int nr_bhs; + struct buffer_head *bhs[2]; +}; + +static inline void fatent_init(struct fat_entry *fatent) +{ + fatent->nr_bhs = 0; + fatent->entry = 0; + fatent->u.ent32_p = NULL; + fatent->bhs[0] = fatent->bhs[1] = NULL; +} + +static inline void fatent_set_entry(struct fat_entry *fatent, int entry) +{ + fatent->entry = entry; + fatent->u.ent32_p = NULL; +} + +static inline void fatent_brelse(struct fat_entry *fatent) +{ + int i; + fatent->u.ent32_p = NULL; + for (i = 0; i < fatent->nr_bhs; i++) + brelse(fatent->bhs[i]); + fatent->nr_bhs = 0; + fatent->bhs[0] = fatent->bhs[1] = NULL; +} + +extern void fat_ent_access_init(struct super_block *sb); +extern int fat_ent_read(struct inode *inode, struct fat_entry *fatent, + int entry); +extern int fat_ent_write(struct inode *inode, struct fat_entry *fatent, + int new, int wait); +extern int fat_alloc_clusters(struct inode *inode, int *cluster, + int nr_cluster); +extern int fat_free_clusters(struct inode *inode, int cluster); +extern int fat_count_free_clusters(struct super_block *sb); + +/* fat/file.c */ +extern int fat_generic_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg); +extern const struct file_operations fat_file_operations; +extern const struct inode_operations fat_file_inode_operations; +extern int fat_setattr(struct dentry * dentry, struct iattr * attr); +extern void fat_truncate(struct inode *inode); +extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat); + +/* fat/inode.c */ +extern void fat_attach(struct inode *inode, loff_t i_pos); +extern void fat_detach(struct inode *inode); +extern struct inode *fat_iget(struct super_block *sb, loff_t i_pos); +extern struct inode *fat_build_inode(struct super_block *sb, + struct msdos_dir_entry *de, loff_t i_pos); +extern int fat_sync_inode(struct inode *inode); +extern int fat_fill_super(struct super_block *sb, void *data, int silent, + const struct inode_operations *fs_dir_inode_ops, int isvfat); + +extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, + struct inode *i2); +/* fat/misc.c */ +extern void fat_fs_panic(struct super_block *s, const char *fmt, ...); +extern void fat_clusters_flush(struct super_block *sb); +extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster); +extern int date_dos2unix(unsigned short time, unsigned short date, int tz_utc); +extern void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date, + int tz_utc); +extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs); + +int fat_cache_init(void); +void fat_cache_destroy(void); + +#endif /* !_FAT_H */ diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index fb98b3d847ed..5b5f49061b7c 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -7,6 +7,7 @@ #include #include #include +#include "fat.h" struct fatent_operations { void (*ent_blocknr)(struct super_block *, int, int *, sector_t *); diff --git a/fs/fat/file.c b/fs/fat/file.c index ddde37025ca6..b21973f266a1 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -10,13 +10,13 @@ #include #include #include -#include #include #include #include #include #include #include +#include "fat.h" int fat_generic_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 2b2eec1283bf..3921de2013a4 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -28,6 +27,7 @@ #include #include #include +#include "fat.h" #ifndef CONFIG_FAT_DEFAULT_IOCHARSET /* if user don't select VFAT, this is undefined. */ diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 79fb98ad36d4..91ad9be18ff9 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -8,8 +8,8 @@ #include #include -#include #include +#include "fat.h" /* * fat_fs_panic reports a severe file system problem and sets the file system diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index e844b9809d27..c0a4d5cd99b2 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -9,8 +9,8 @@ #include #include #include -#include #include +#include "fat.h" /* Characters that are undesirable in an MS-DOS file name */ static unsigned char bad_chars[] = "*?<>|\""; diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 155c10b4adbd..facf3bf0211a 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -16,14 +16,13 @@ */ #include - #include -#include #include #include #include #include #include +#include "fat.h" static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) { diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h index ba63858056c7..0982fb47a90d 100644 --- a/include/linux/msdos_fs.h +++ b/include/linux/msdos_fs.h @@ -167,282 +167,10 @@ struct msdos_dir_slot { }; #ifdef __KERNEL__ - -#include -#include -#include -#include -#include - -/* - * vfat shortname flags - */ -#define VFAT_SFN_DISPLAY_LOWER 0x0001 /* convert to lowercase for display */ -#define VFAT_SFN_DISPLAY_WIN95 0x0002 /* emulate win95 rule for display */ -#define VFAT_SFN_DISPLAY_WINNT 0x0004 /* emulate winnt rule for display */ -#define VFAT_SFN_CREATE_WIN95 0x0100 /* emulate win95 rule for create */ -#define VFAT_SFN_CREATE_WINNT 0x0200 /* emulate winnt rule for create */ - -struct fat_mount_options { - uid_t fs_uid; - gid_t fs_gid; - unsigned short fs_fmask; - unsigned short fs_dmask; - unsigned short codepage; /* Codepage for shortname conversions */ - char *iocharset; /* Charset used for filename input/display */ - unsigned short shortname; /* flags for shortname display/create rule */ - unsigned char name_check; /* r = relaxed, n = normal, s = strict */ - unsigned short allow_utime;/* permission for setting the [am]time */ - unsigned quiet:1, /* set = fake successful chmods and chowns */ - showexec:1, /* set = only set x bit for com/exe/bat */ - sys_immutable:1, /* set = system files are immutable */ - dotsOK:1, /* set = hidden and system files are named '.filename' */ - isvfat:1, /* 0=no vfat long filename support, 1=vfat support */ - utf8:1, /* Use of UTF-8 character set (Default) */ - unicode_xlate:1, /* create escape sequences for unhandled Unicode */ - numtail:1, /* Does first alias have a numeric '~1' type tail? */ - flush:1, /* write things quickly */ - nocase:1, /* Does this need case conversion? 0=need case conversion*/ - usefree:1, /* Use free_clusters for FAT32 */ - tz_utc:1; /* Filesystem timestamps are in UTC */ -}; - -#define FAT_HASH_BITS 8 -#define FAT_HASH_SIZE (1UL << FAT_HASH_BITS) -#define FAT_HASH_MASK (FAT_HASH_SIZE-1) - -/* - * MS-DOS file system in-core superblock data - */ -struct msdos_sb_info { - unsigned short sec_per_clus; /* sectors/cluster */ - unsigned short cluster_bits; /* log2(cluster_size) */ - unsigned int cluster_size; /* cluster size */ - unsigned char fats,fat_bits; /* number of FATs, FAT bits (12 or 16) */ - unsigned short fat_start; - unsigned long fat_length; /* FAT start & length (sec.) */ - unsigned long dir_start; - unsigned short dir_entries; /* root dir start & entries */ - unsigned long data_start; /* first data sector */ - unsigned long max_cluster; /* maximum cluster number */ - unsigned long root_cluster; /* first cluster of the root directory */ - unsigned long fsinfo_sector; /* sector number of FAT32 fsinfo */ - struct mutex fat_lock; - unsigned int prev_free; /* previously allocated cluster number */ - unsigned int free_clusters; /* -1 if undefined */ - unsigned int free_clus_valid; /* is free_clusters valid? */ - struct fat_mount_options options; - struct nls_table *nls_disk; /* Codepage used on disk */ - struct nls_table *nls_io; /* Charset used for input and display */ - const void *dir_ops; /* Opaque; default directory operations */ - int dir_per_block; /* dir entries per block */ - int dir_per_block_bits; /* log2(dir_per_block) */ - - int fatent_shift; - struct fatent_operations *fatent_ops; - - spinlock_t inode_hash_lock; - struct hlist_head inode_hashtable[FAT_HASH_SIZE]; -}; - -#define FAT_CACHE_VALID 0 /* special case for valid cache */ - -/* - * MS-DOS file system inode data in memory - */ -struct msdos_inode_info { - spinlock_t cache_lru_lock; - struct list_head cache_lru; - int nr_caches; - /* for avoiding the race between fat_free() and fat_get_cluster() */ - unsigned int cache_valid_id; - - loff_t mmu_private; - int i_start; /* first cluster or 0 */ - int i_logstart; /* logical first cluster */ - int i_attrs; /* unused attribute bits */ - loff_t i_pos; /* on-disk position of directory entry or 0 */ - struct hlist_node i_fat_hash; /* hash by i_location */ - struct inode vfs_inode; -}; - -struct fat_slot_info { - loff_t i_pos; /* on-disk position of directory entry */ - loff_t slot_off; /* offset for slot or de start */ - int nr_slots; /* number of slots + 1(de) in filename */ - struct msdos_dir_entry *de; - struct buffer_head *bh; -}; - -static inline struct msdos_sb_info *MSDOS_SB(struct super_block *sb) -{ - return sb->s_fs_info; -} - -static inline struct msdos_inode_info *MSDOS_I(struct inode *inode) -{ - return container_of(inode, struct msdos_inode_info, vfs_inode); -} - -/* Return the FAT attribute byte for this inode */ -static inline u8 fat_attr(struct inode *inode) -{ - return ((inode->i_mode & S_IWUGO) ? ATTR_NONE : ATTR_RO) | - (S_ISDIR(inode->i_mode) ? ATTR_DIR : ATTR_NONE) | - MSDOS_I(inode)->i_attrs; -} - -static inline unsigned char fat_checksum(const __u8 *name) -{ - unsigned char s = name[0]; - s = (s<<7) + (s>>1) + name[1]; s = (s<<7) + (s>>1) + name[2]; - s = (s<<7) + (s>>1) + name[3]; s = (s<<7) + (s>>1) + name[4]; - s = (s<<7) + (s>>1) + name[5]; s = (s<<7) + (s>>1) + name[6]; - s = (s<<7) + (s>>1) + name[7]; s = (s<<7) + (s>>1) + name[8]; - s = (s<<7) + (s>>1) + name[9]; s = (s<<7) + (s>>1) + name[10]; - return s; -} - -static inline sector_t fat_clus_to_blknr(struct msdos_sb_info *sbi, int clus) -{ - return ((sector_t)clus - FAT_START_ENT) * sbi->sec_per_clus - + sbi->data_start; -} - -static inline void fat16_towchar(wchar_t *dst, const __u8 *src, size_t len) -{ -#ifdef __BIG_ENDIAN - while (len--) { - *dst++ = src[0] | (src[1] << 8); - src += 2; - } -#else - memcpy(dst, src, len * 2); -#endif -} - -static inline void fatwchar_to16(__u8 *dst, const wchar_t *src, size_t len) -{ -#ifdef __BIG_ENDIAN - while (len--) { - dst[0] = *src & 0x00FF; - dst[1] = (*src & 0xFF00) >> 8; - dst += 2; - src++; - } -#else - memcpy(dst, src, len * 2); -#endif -} - /* media of boot sector */ static inline int fat_valid_media(u8 media) { return 0xf8 <= media || media == 0xf0; } - -/* fat/cache.c */ -extern void fat_cache_inval_inode(struct inode *inode); -extern int fat_get_cluster(struct inode *inode, int cluster, - int *fclus, int *dclus); -extern int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, - unsigned long *mapped_blocks); - -/* fat/dir.c */ -extern const struct file_operations fat_dir_operations; -extern int fat_search_long(struct inode *inode, const unsigned char *name, - int name_len, struct fat_slot_info *sinfo); -extern int fat_dir_empty(struct inode *dir); -extern int fat_subdirs(struct inode *dir); -extern int fat_scan(struct inode *dir, const unsigned char *name, - struct fat_slot_info *sinfo); -extern int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh, - struct msdos_dir_entry **de, loff_t *i_pos); -extern int fat_alloc_new_dir(struct inode *dir, struct timespec *ts); -extern int fat_add_entries(struct inode *dir, void *slots, int nr_slots, - struct fat_slot_info *sinfo); -extern int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo); - -/* fat/fatent.c */ -struct fat_entry { - int entry; - union { - u8 *ent12_p[2]; - __le16 *ent16_p; - __le32 *ent32_p; - } u; - int nr_bhs; - struct buffer_head *bhs[2]; -}; - -static inline void fatent_init(struct fat_entry *fatent) -{ - fatent->nr_bhs = 0; - fatent->entry = 0; - fatent->u.ent32_p = NULL; - fatent->bhs[0] = fatent->bhs[1] = NULL; -} - -static inline void fatent_set_entry(struct fat_entry *fatent, int entry) -{ - fatent->entry = entry; - fatent->u.ent32_p = NULL; -} - -static inline void fatent_brelse(struct fat_entry *fatent) -{ - int i; - fatent->u.ent32_p = NULL; - for (i = 0; i < fatent->nr_bhs; i++) - brelse(fatent->bhs[i]); - fatent->nr_bhs = 0; - fatent->bhs[0] = fatent->bhs[1] = NULL; -} - -extern void fat_ent_access_init(struct super_block *sb); -extern int fat_ent_read(struct inode *inode, struct fat_entry *fatent, - int entry); -extern int fat_ent_write(struct inode *inode, struct fat_entry *fatent, - int new, int wait); -extern int fat_alloc_clusters(struct inode *inode, int *cluster, - int nr_cluster); -extern int fat_free_clusters(struct inode *inode, int cluster); -extern int fat_count_free_clusters(struct super_block *sb); - -/* fat/file.c */ -extern int fat_generic_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg); -extern const struct file_operations fat_file_operations; -extern const struct inode_operations fat_file_inode_operations; -extern int fat_setattr(struct dentry * dentry, struct iattr * attr); -extern void fat_truncate(struct inode *inode); -extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat); - -/* fat/inode.c */ -extern void fat_attach(struct inode *inode, loff_t i_pos); -extern void fat_detach(struct inode *inode); -extern struct inode *fat_iget(struct super_block *sb, loff_t i_pos); -extern struct inode *fat_build_inode(struct super_block *sb, - struct msdos_dir_entry *de, loff_t i_pos); -extern int fat_sync_inode(struct inode *inode); -extern int fat_fill_super(struct super_block *sb, void *data, int silent, - const struct inode_operations *fs_dir_inode_ops, int isvfat); - -extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, - struct inode *i2); -/* fat/misc.c */ -extern void fat_fs_panic(struct super_block *s, const char *fmt, ...); -extern void fat_clusters_flush(struct super_block *sb); -extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster); -extern int date_dos2unix(unsigned short time, unsigned short date, int tz_utc); -extern void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date, - int tz_utc); -extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs); - -int fat_cache_init(void); -void fat_cache_destroy(void); - -#endif /* __KERNEL__ */ - -#endif +#endif /* !__KERNEL__ */ +#endif /* !_LINUX_MSDOS_FS_H */ From 7decd1cb0305b97243f283fa7f4baf5fe613edeb Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:47 -0800 Subject: [PATCH 152/173] fat: Fix and cleanup timestamp conversion This cleans date_dos2unix()/fat_date_unix2dos() up. New code should be much more readable. And this fixes those old functions. Those doesn't handle 2100 correctly. 2100 isn't leap year, but old one handles it as leap year. Also, with this, centi sec is handled and is fixed. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/dir.c | 6 +- fs/fat/fat.h | 7 ++- fs/fat/inode.c | 34 ++++------ fs/fat/misc.c | 146 +++++++++++++++++++++++++++++++------------ fs/fat/namei_msdos.c | 2 +- fs/fat/namei_vfat.c | 5 +- 6 files changed, 129 insertions(+), 71 deletions(-) diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 08b23ad25f1c..a601c6d45bc0 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -1089,6 +1089,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts) struct msdos_dir_entry *de; sector_t blknr; __le16 date, time; + u8 time_cs; int err, cluster; err = fat_alloc_clusters(dir, &cluster, 1); @@ -1102,7 +1103,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts) goto error_free; } - fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc); + fat_time_unix2fat(sbi, ts, &time, &date, &time_cs); de = (struct msdos_dir_entry *)bhs[0]->b_data; /* filling the new directory slots ("." and ".." entries) */ @@ -1112,13 +1113,14 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts) de[0].lcase = de[1].lcase = 0; de[0].time = de[1].time = time; de[0].date = de[1].date = date; - de[0].ctime_cs = de[1].ctime_cs = 0; if (sbi->options.isvfat) { /* extra timestamps */ de[0].ctime = de[1].ctime = time; + de[0].ctime_cs = de[1].ctime_cs = time_cs; de[0].adate = de[0].cdate = de[1].adate = de[1].cdate = date; } else { de[0].ctime = de[1].ctime = 0; + de[0].ctime_cs = de[1].ctime_cs = 0; de[0].adate = de[0].cdate = de[1].adate = de[1].cdate = 0; } de[0].start = cpu_to_le16(cluster); diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 51f1c42ca5e3..a2a570f81719 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -263,9 +263,10 @@ extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, extern void fat_fs_panic(struct super_block *s, const char *fmt, ...); extern void fat_clusters_flush(struct super_block *sb); extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster); -extern int date_dos2unix(unsigned short time, unsigned short date, int tz_utc); -extern void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date, - int tz_utc); +extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts, + __le16 __time, __le16 __date, u8 time_cs); +extern void fat_time_unix2fat(struct msdos_sb_info *sbi, struct timespec *ts, + __le16 *time, __le16 *date, u8 *time_cs); extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs); int fat_cache_init(void); diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 3921de2013a4..079d9d5e0d36 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -381,22 +381,12 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) MSDOS_I(inode)->i_attrs = de->attr & ATTR_UNUSED; inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1)) & ~((loff_t)sbi->cluster_size - 1)) >> 9; - inode->i_mtime.tv_sec = - date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date), - sbi->options.tz_utc); - inode->i_mtime.tv_nsec = 0; + + fat_time_fat2unix(sbi, &inode->i_mtime, de->time, de->date, 0); if (sbi->options.isvfat) { - int secs = de->ctime_cs / 100; - int csecs = de->ctime_cs % 100; - inode->i_ctime.tv_sec = - date_dos2unix(le16_to_cpu(de->ctime), - le16_to_cpu(de->cdate), - sbi->options.tz_utc) + secs; - inode->i_ctime.tv_nsec = csecs * 10000000; - inode->i_atime.tv_sec = - date_dos2unix(0, le16_to_cpu(de->adate), - sbi->options.tz_utc); - inode->i_atime.tv_nsec = 0; + fat_time_fat2unix(sbi, &inode->i_ctime, de->ctime, + de->cdate, de->ctime_cs); + fat_time_fat2unix(sbi, &inode->i_atime, 0, de->adate, 0); } else inode->i_ctime = inode->i_atime = inode->i_mtime; @@ -591,16 +581,14 @@ static int fat_write_inode(struct inode *inode, int wait) raw_entry->attr = fat_attr(inode); raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart); raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16); - fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time, - &raw_entry->date, sbi->options.tz_utc); + fat_time_unix2fat(sbi, &inode->i_mtime, &raw_entry->time, + &raw_entry->date, NULL); if (sbi->options.isvfat) { __le16 atime; - fat_date_unix2dos(inode->i_ctime.tv_sec, &raw_entry->ctime, - &raw_entry->cdate, sbi->options.tz_utc); - fat_date_unix2dos(inode->i_atime.tv_sec, &atime, - &raw_entry->adate, sbi->options.tz_utc); - raw_entry->ctime_cs = (inode->i_ctime.tv_sec & 1) * 100 + - inode->i_ctime.tv_nsec / 10000000; + fat_time_unix2fat(sbi, &inode->i_ctime, &raw_entry->ctime, + &raw_entry->cdate, &raw_entry->ctime_cs); + fat_time_unix2fat(sbi, &inode->i_atime, &atime, + &raw_entry->adate, NULL); } spin_unlock(&sbi->inode_hash_lock); mark_buffer_dirty(bh); diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 91ad9be18ff9..a191e79e66a9 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -135,65 +135,131 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster) extern struct timezone sys_tz; +/* + * The epoch of FAT timestamp is 1980. + * : bits : value + * date: 0 - 4: day (1 - 31) + * date: 5 - 8: month (1 - 12) + * date: 9 - 15: year (0 - 127) from 1980 + * time: 0 - 4: sec (0 - 29) 2sec counts + * time: 5 - 10: min (0 - 59) + * time: 11 - 15: hour (0 - 23) + */ +#define SECS_PER_MIN 60 +#define SECS_PER_HOUR (60 * 60) +#define SECS_PER_DAY (SECS_PER_HOUR * 24) +#define UNIX_SECS_1980 315532800L +#if BITS_PER_LONG == 64 +#define UNIX_SECS_2108 4354819200L +#endif +/* days between 1.1.70 and 1.1.80 (2 leap days) */ +#define DAYS_DELTA (365 * 10 + 2) +/* 120 (2100 - 1980) isn't leap year */ +#define YEAR_2100 120 +#define IS_LEAP_YEAR(y) (!((y) & 3) && (y) != YEAR_2100) + /* Linear day numbers of the respective 1sts in non-leap years. */ -static int day_n[] = { - /* Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec */ - 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 0, 0, 0, 0 +static time_t days_in_year[] = { + /* Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec */ + 0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 0, 0, 0, }; -/* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */ -int date_dos2unix(unsigned short time, unsigned short date, int tz_utc) +/* Convert a FAT time/date pair to a UNIX date (seconds since 1 1 70). */ +void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts, + __le16 __time, __le16 __date, u8 time_cs) { - int month, year, secs; + u16 time = le16_to_cpu(__time), date = le16_to_cpu(__date); + time_t second, day, leap_day, month, year; - /* - * first subtract and mask after that... Otherwise, if - * date == 0, bad things happen - */ - month = ((date >> 5) - 1) & 15; - year = date >> 9; - secs = (time & 31)*2+60*((time >> 5) & 63)+(time >> 11)*3600+86400* - ((date & 31)-1+day_n[month]+(year/4)+year*365-((year & 3) == 0 && - month < 2 ? 1 : 0)+3653); - /* days since 1.1.70 plus 80's leap day */ - if (!tz_utc) - secs += sys_tz.tz_minuteswest*60; - return secs; + year = date >> 9; + month = max(1, (date >> 5) & 0xf); + day = max(1, date & 0x1f) - 1; + + leap_day = (year + 3) / 4; + if (year > YEAR_2100) /* 2100 isn't leap year */ + leap_day--; + if (IS_LEAP_YEAR(year) && month > 2) + leap_day++; + + second = (time & 0x1f) << 1; + second += ((time >> 5) & 0x3f) * SECS_PER_MIN; + second += (time >> 11) * SECS_PER_HOUR; + second += (year * 365 + leap_day + + days_in_year[month] + day + + DAYS_DELTA) * SECS_PER_DAY; + + if (!sbi->options.tz_utc) + second += sys_tz.tz_minuteswest * SECS_PER_MIN; + + if (time_cs) { + ts->tv_sec = second + (time_cs / 100); + ts->tv_nsec = (time_cs % 100) * 10000000; + } else { + ts->tv_sec = second; + ts->tv_nsec = 0; + } } -/* Convert linear UNIX date to a MS-DOS time/date pair. */ -void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date, int tz_utc) +/* Convert linear UNIX date to a FAT time/date pair. */ +void fat_time_unix2fat(struct msdos_sb_info *sbi, struct timespec *ts, + __le16 *time, __le16 *date, u8 *time_cs) { - int day, year, nl_day, month; + time_t second = ts->tv_sec; + time_t day, leap_day, month, year; - if (!tz_utc) - unix_date -= sys_tz.tz_minuteswest*60; + if (!sbi->options.tz_utc) + second -= sys_tz.tz_minuteswest * SECS_PER_MIN; /* Jan 1 GMT 00:00:00 1980. But what about another time zone? */ - if (unix_date < 315532800) - unix_date = 315532800; + if (second < UNIX_SECS_1980) { + *time = 0; + *date = cpu_to_le16((0 << 9) | (1 << 5) | 1); + if (time_cs) + *time_cs = 0; + return; + } +#if BITS_PER_LONG == 64 + if (second >= UNIX_SECS_2108) { + *time = cpu_to_le16((23 << 11) | (59 << 5) | 29); + *date = cpu_to_le16((127 << 9) | (12 << 5) | 31); + if (time_cs) + *time_cs = 199; + return; + } +#endif - *time = cpu_to_le16((unix_date % 60)/2+(((unix_date/60) % 60) << 5)+ - (((unix_date/3600) % 24) << 11)); - day = unix_date/86400-3652; - year = day/365; - if ((year+3)/4+365*year > day) + day = second / SECS_PER_DAY - DAYS_DELTA; + year = day / 365; + leap_day = (year + 3) / 4; + if (year > YEAR_2100) /* 2100 isn't leap year */ + leap_day--; + if (year * 365 + leap_day > day) year--; - day -= (year+3)/4+365*year; - if (day == 59 && !(year & 3)) { - nl_day = day; + leap_day = (year + 3) / 4; + if (year > YEAR_2100) /* 2100 isn't leap year */ + leap_day--; + day -= year * 365 + leap_day; + + if (IS_LEAP_YEAR(year) && day == days_in_year[3]) { month = 2; } else { - nl_day = (year & 3) || day <= 59 ? day : day-1; - for (month = 0; month < 12; month++) { - if (day_n[month] > nl_day) + if (IS_LEAP_YEAR(year) && day > days_in_year[3]) + day--; + for (month = 1; month < 12; month++) { + if (days_in_year[month + 1] > day) break; } } - *date = cpu_to_le16(nl_day-day_n[month-1]+1+(month << 5)+(year << 9)); -} + day -= days_in_year[month]; -EXPORT_SYMBOL_GPL(fat_date_unix2dos); + *time = cpu_to_le16(((second / SECS_PER_HOUR) % 24) << 11 + | ((second / SECS_PER_MIN) % 60) << 5 + | (second % SECS_PER_MIN) >> 1); + *date = cpu_to_le16((year << 9) | (month << 5) | (day + 1)); + if (time_cs) + *time_cs = (ts->tv_sec & 1) * 100 + ts->tv_nsec / 10000000; +} +EXPORT_SYMBOL_GPL(fat_time_unix2fat); int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs) { diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index c0a4d5cd99b2..e92e8158ebaf 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -247,7 +247,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name, if (is_hid) de.attr |= ATTR_HIDDEN; de.lcase = 0; - fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc); + fat_time_unix2fat(sbi, ts, &time, &date, NULL); de.cdate = de.adate = 0; de.ctime = 0; de.ctime_cs = 0; diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index facf3bf0211a..1536bc3ca0f0 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -568,6 +568,7 @@ static int vfat_build_slots(struct inode *dir, const unsigned char *name, unsigned char msdos_name[MSDOS_NAME]; wchar_t *uname; __le16 time, date; + u8 time_cs; int err, ulen, usize, i; loff_t offset; @@ -620,10 +621,10 @@ static int vfat_build_slots(struct inode *dir, const unsigned char *name, memcpy(de->name, msdos_name, MSDOS_NAME); de->attr = is_dir ? ATTR_DIR : ATTR_ARCH; de->lcase = lcase; - fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc); + fat_time_unix2fat(sbi, ts, &time, &date, &time_cs); de->time = de->ctime = time; de->date = de->cdate = de->adate = date; - de->ctime_cs = 0; + de->ctime_cs = time_cs; de->start = cpu_to_le16(cluster); de->starthi = cpu_to_le16(cluster >> 16); de->size = 0; From 53472bc8f810d2fb507593ea03703670506a668d Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:47 -0800 Subject: [PATCH 153/173] fat: use generic_file_llseek() for directory Since fat_dir_ioctl() was already fixed (i.e. called under ->i_mutex), and __fat_readdir() doesn't take BKL anymore. So, BKL for ->llseek() is pointless, and we have to use generic_file_llseek(). Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/dir.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/fat/dir.c b/fs/fat/dir.c index a601c6d45bc0..931dd28b5289 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -832,6 +832,7 @@ static long fat_compat_dir_ioctl(struct file *filp, unsigned cmd, #endif /* CONFIG_COMPAT */ const struct file_operations fat_dir_operations = { + .llseek = generic_file_llseek, .read = generic_read_dir, .readdir = fat_readdir, .ioctl = fat_dir_ioctl, From 52e9d9f4b32a3bec91feb76c84e37b7dcffe5040 Mon Sep 17 00:00:00 2001 From: Darren Jenkins Date: Thu, 6 Nov 2008 12:53:48 -0800 Subject: [PATCH 154/173] fat: cleanup fat_parse_long() error handling Coverity CID 2332 & 2333 RESOURCE_LEAK In fat_search_long() if fat_parse_long() returns a -ve value we return without first freeing unicode. This patch free's them on this error path. The above was false positive on current tree, but this change is more clean, so apply as cleanup. [hirofumi@mail.parknet.co.jp: fix coding style] Signed-off-by: Darren Jenkins Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/dir.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 931dd28b5289..140fc39e2307 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -373,9 +373,10 @@ int fat_search_long(struct inode *inode, const unsigned char *name, if (de->attr == ATTR_EXT) { int status = fat_parse_long(inode, &cpos, &bh, &de, &unicode, &nr_slots); - if (status < 0) - return status; - else if (status == PARSE_INVALID) + if (status < 0) { + err = status; + goto end_of_dir; + } else if (status == PARSE_INVALID) continue; else if (status == PARSE_NOT_LONGNAME) goto parse_record; From d3dfa8228f87ab9960ab8b4718013d68e3c25a43 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:49 -0800 Subject: [PATCH 155/173] fat: improve fat_hash() fat_hash() is using the algorithm known as bad. Instead of it, this uses hash_32(). The following is the summary of test. old hash: hash func (1000 times): 33489 cycles total inodes in hash table: 70926 largest bucket contains: 696 smallest bucket contains: 54 new hash: hash func (1000 times): 33129 cycles total inodes in hash table: 70926 largest bucket contains: 315 smallest bucket contains: 236 Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/fat.h | 1 - fs/fat/inode.c | 18 +++++++----------- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/fs/fat/fat.h b/fs/fat/fat.h index a2a570f81719..2b8e94c3eef4 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -43,7 +43,6 @@ struct fat_mount_options { #define FAT_HASH_BITS 8 #define FAT_HASH_SIZE (1UL << FAT_HASH_BITS) -#define FAT_HASH_MASK (FAT_HASH_SIZE-1) /* * MS-DOS file system in-core superblock data diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 079d9d5e0d36..f58cd48d98b8 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include "fat.h" @@ -247,25 +248,21 @@ static void fat_hash_init(struct super_block *sb) INIT_HLIST_HEAD(&sbi->inode_hashtable[i]); } -static inline unsigned long fat_hash(struct super_block *sb, loff_t i_pos) +static inline unsigned long fat_hash(loff_t i_pos) { - unsigned long tmp = (unsigned long)i_pos | (unsigned long) sb; - tmp = tmp + (tmp >> FAT_HASH_BITS) + (tmp >> FAT_HASH_BITS * 2); - return tmp & FAT_HASH_MASK; + return hash_32(i_pos, FAT_HASH_BITS); } void fat_attach(struct inode *inode, loff_t i_pos) { - struct super_block *sb = inode->i_sb; - struct msdos_sb_info *sbi = MSDOS_SB(sb); + struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); + struct hlist_head *head = sbi->inode_hashtable + fat_hash(i_pos); spin_lock(&sbi->inode_hash_lock); MSDOS_I(inode)->i_pos = i_pos; - hlist_add_head(&MSDOS_I(inode)->i_fat_hash, - sbi->inode_hashtable + fat_hash(sb, i_pos)); + hlist_add_head(&MSDOS_I(inode)->i_fat_hash, head); spin_unlock(&sbi->inode_hash_lock); } - EXPORT_SYMBOL_GPL(fat_attach); void fat_detach(struct inode *inode) @@ -276,13 +273,12 @@ void fat_detach(struct inode *inode) hlist_del_init(&MSDOS_I(inode)->i_fat_hash); spin_unlock(&sbi->inode_hash_lock); } - EXPORT_SYMBOL_GPL(fat_detach); struct inode *fat_iget(struct super_block *sb, loff_t i_pos) { struct msdos_sb_info *sbi = MSDOS_SB(sb); - struct hlist_head *head = sbi->inode_hashtable + fat_hash(sb, i_pos); + struct hlist_head *head = sbi->inode_hashtable + fat_hash(i_pos); struct hlist_node *_p; struct msdos_inode_info *i; struct inode *inode = NULL; From 5e35dd4651002207948f10c576fc7d9bad448815 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:49 -0800 Subject: [PATCH 156/173] fat: Fix fat_ent_update_ptr() for FAT12 This fixes the missing update for bhs/nr_bhs in case the caller accessed from block boundary to first block of boundary. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/fatent.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 5b5f49061b7c..13513992da3c 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -317,10 +317,20 @@ static inline int fat_ent_update_ptr(struct super_block *sb, /* Is this fatent's blocks including this entry? */ if (!fatent->nr_bhs || bhs[0]->b_blocknr != blocknr) return 0; - /* Does this entry need the next block? */ - if (sbi->fat_bits == 12 && (offset + 1) >= sb->s_blocksize) { - if (fatent->nr_bhs != 2 || bhs[1]->b_blocknr != (blocknr + 1)) - return 0; + if (sbi->fat_bits == 12) { + if ((offset + 1) < sb->s_blocksize) { + /* This entry is on bhs[0]. */ + if (fatent->nr_bhs == 2) { + brelse(bhs[1]); + fatent->nr_bhs = 1; + } + } else { + /* This entry needs the next block. */ + if (fatent->nr_bhs != 2) + return 0; + if (bhs[1]->b_blocknr != (blocknr + 1)) + return 0; + } } ops->ent_set_ptr(fatent, offset); return 1; From a993b542bb4cd3e5a64863b7ef892bbebec2239b Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:50 -0800 Subject: [PATCH 157/173] fat: use fat_detach() in fat_clear_inode() Use fat_detach() instead of opencoding it. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/inode.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index f58cd48d98b8..8e1b75c63c7f 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -429,13 +429,8 @@ static void fat_delete_inode(struct inode *inode) static void fat_clear_inode(struct inode *inode) { - struct super_block *sb = inode->i_sb; - struct msdos_sb_info *sbi = MSDOS_SB(sb); - - spin_lock(&sbi->inode_hash_lock); fat_cache_inval_inode(inode); - hlist_del_init(&MSDOS_I(inode)->i_fat_hash); - spin_unlock(&sbi->inode_hash_lock); + fat_detach(inode); } static void fat_write_super(struct super_block *sb) From 068f5ae05c51d2cee6b31cb3da06775dd83bd348 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:51 -0800 Subject: [PATCH 158/173] vfat: Fix vfat_find() error path in vfat_lookup() Current vfat_lookup() creates negetive dentry blindly if vfat_find() returned a error. It's wrong. If the error isn't -ENOENT, just return error. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/namei_vfat.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 1536bc3ca0f0..419deabfb9be 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -683,7 +683,7 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, { struct super_block *sb = dir->i_sb; struct fat_slot_info sinfo; - struct inode *inode = NULL; + struct inode *inode; struct dentry *alias; int err, table; @@ -693,14 +693,18 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, err = vfat_find(dir, &dentry->d_name, &sinfo); if (err) { - table++; + if (err == -ENOENT) { + table++; + inode = NULL; + goto out; + } goto error; } inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); brelse(sinfo.bh); if (IS_ERR(inode)) { - unlock_super(sb); - return ERR_CAST(inode); + err = PTR_ERR(inode); + goto error; } alias = d_find_alias(inode); if (alias) { @@ -713,7 +717,7 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, } } -error: +out: unlock_super(sb); dentry->d_op = &vfat_dentry_ops[table]; dentry->d_time = dentry->d_parent->d_inode->i_version; @@ -723,6 +727,10 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, dentry->d_time = dentry->d_parent->d_inode->i_version; } return dentry; + +error: + unlock_super(sb); + return ERR_PTR(err); } static int vfat_create(struct inode *dir, struct dentry *dentry, int mode, From 1b52467243c7167b3a267ddbcbb14d550f28eb4a Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:51 -0800 Subject: [PATCH 159/173] fat: Fix/Cleanup dcache handling for vfat - Add comments for handling dcache of vfat. - Separate case-sensitive case and case-insensitive to vfat_revalidate() and vfat_ci_revalidate(). vfat_revalidate() doesn't need to drop case-insensitive negative dentry on creation path. - Current code is missing to set ->d_revalidate to the negative dentry created by unlink/etc.. This sets ->d_revalidate always, and returns 1 for positive dentry. Now, we don't need to change ->d_op dynamically anymore, so this just uses sb->s_root->d_op to set ->d_op. - d_find_alias() may return DCACHE_DISCONNECTED dentry. It's not the interesting dentry there. This checks it. - Add missing LOOKUP_PARENT check. We don't need to drop the valid negative dentry for (LOOKUP_CREATE | LOOKUP_PARENT) lookup. - For consistent filename on creation path, this drops negative dentry if we can't see intent. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/namei_vfat.c | 124 ++++++++++++++++++++++++++++---------------- 1 file changed, 80 insertions(+), 44 deletions(-) diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 419deabfb9be..d585398f9f6b 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -24,27 +24,67 @@ #include #include "fat.h" -static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) +/* + * If new entry was created in the parent, it could create the 8.3 + * alias (the shortname of logname). So, the parent may have the + * negative-dentry which matches the created 8.3 alias. + * + * If it happened, the negative dentry isn't actually negative + * anymore. So, drop it. + */ +static int vfat_revalidate_shortname(struct dentry *dentry) { int ret = 1; - - if (!dentry->d_inode && - nd && !(nd->flags & LOOKUP_CONTINUE) && (nd->flags & LOOKUP_CREATE)) - /* - * negative dentry is dropped, in order to make sure - * to use the name which a user desires if this is - * create path. - */ + spin_lock(&dentry->d_lock); + if (dentry->d_time != dentry->d_parent->d_inode->i_version) ret = 0; - else { - spin_lock(&dentry->d_lock); - if (dentry->d_time != dentry->d_parent->d_inode->i_version) - ret = 0; - spin_unlock(&dentry->d_lock); - } + spin_unlock(&dentry->d_lock); return ret; } +static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) +{ + /* This is not negative dentry. Always valid. */ + if (dentry->d_inode) + return 1; + return vfat_revalidate_shortname(dentry); +} + +static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd) +{ + /* + * This is not negative dentry. Always valid. + * + * Note, rename() to existing directory entry will have ->d_inode, + * and will use existing name which isn't specified name by user. + * + * We may be able to drop this positive dentry here. But dropping + * positive dentry isn't good idea. So it's unsupported like + * rename("filename", "FILENAME") for now. + */ + if (dentry->d_inode) + return 1; + + /* + * This may be nfsd (or something), anyway, we can't see the + * intent of this. So, since this can be for creation, drop it. + */ + if (!nd) + return 0; + + /* + * Drop the negative dentry, in order to make sure to use the + * case sensitive name which is specified by user if this is + * for creation. + */ + if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) { + if (nd->flags & LOOKUP_CREATE) + return 0; + } + + return vfat_revalidate_shortname(dentry); +} + /* returns the length of a struct qstr, ignoring trailing dots */ static unsigned int vfat_striptail_len(struct qstr *qstr) { @@ -126,25 +166,16 @@ static int vfat_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b) return 1; } -static struct dentry_operations vfat_dentry_ops[4] = { - { - .d_hash = vfat_hashi, - .d_compare = vfat_cmpi, - }, - { - .d_revalidate = vfat_revalidate, - .d_hash = vfat_hashi, - .d_compare = vfat_cmpi, - }, - { - .d_hash = vfat_hash, - .d_compare = vfat_cmp, - }, - { - .d_revalidate = vfat_revalidate, - .d_hash = vfat_hash, - .d_compare = vfat_cmp, - } +static struct dentry_operations vfat_ci_dentry_ops = { + .d_revalidate = vfat_revalidate_ci, + .d_hash = vfat_hashi, + .d_compare = vfat_cmpi, +}; + +static struct dentry_operations vfat_dentry_ops = { + .d_revalidate = vfat_revalidate, + .d_hash = vfat_hash, + .d_compare = vfat_cmp, }; /* Characters that are undesirable in an MS-DOS file name */ @@ -685,29 +716,35 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, struct fat_slot_info sinfo; struct inode *inode; struct dentry *alias; - int err, table; + int err; lock_super(sb); - table = (MSDOS_SB(sb)->options.name_check == 's') ? 2 : 0; - dentry->d_op = &vfat_dentry_ops[table]; err = vfat_find(dir, &dentry->d_name, &sinfo); if (err) { if (err == -ENOENT) { - table++; inode = NULL; goto out; } goto error; } + inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); brelse(sinfo.bh); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto error; } + alias = d_find_alias(inode); - if (alias) { + if (alias && !(alias->d_flags & DCACHE_DISCONNECTED)) { + /* + * This inode has non DCACHE_DISCONNECTED dentry. This + * means, the user did ->lookup() by an another name + * (longname vs 8.3 alias of it) in past. + * + * Switch to new one for reason of locality if possible. + */ if (d_invalidate(alias) == 0) dput(alias); else { @@ -715,15 +752,14 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, unlock_super(sb); return alias; } - } out: unlock_super(sb); - dentry->d_op = &vfat_dentry_ops[table]; + dentry->d_op = sb->s_root->d_op; dentry->d_time = dentry->d_parent->d_inode->i_version; dentry = d_splice_alias(inode, dentry); if (dentry) { - dentry->d_op = &vfat_dentry_ops[table]; + dentry->d_op = sb->s_root->d_op; dentry->d_time = dentry->d_parent->d_inode->i_version; } return dentry; @@ -1022,9 +1058,9 @@ static int vfat_fill_super(struct super_block *sb, void *data, int silent) return res; if (MSDOS_SB(sb)->options.name_check != 's') - sb->s_root->d_op = &vfat_dentry_ops[0]; + sb->s_root->d_op = &vfat_ci_dentry_ops; else - sb->s_root->d_op = &vfat_dentry_ops[2]; + sb->s_root->d_op = &vfat_dentry_ops; return 0; } From 1c13a243a461dd5b089d29e5d57f260c990e462c Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:52 -0800 Subject: [PATCH 160/173] fat: Kill d_invalidate() in vfat_lookup() d_invalidate() for positive dentry doesn't work in some cases (vfsmount, nfsd, and maybe others). shrink_dcache_parent() by d_invalidate() is pointless for vfat usage at all. So, this kills it, and intead of it uses d_move(). To save old behavior, this returns alias simply for directory (don't change pwd, etc..). the directory lookup shouldn't be important for performance. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/namei_vfat.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index d585398f9f6b..bf326d4356a3 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -745,13 +745,12 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, * * Switch to new one for reason of locality if possible. */ - if (d_invalidate(alias) == 0) - dput(alias); - else { - iput(inode); - unlock_super(sb); - return alias; - } + BUG_ON(d_unhashed(alias)); + if (!S_ISDIR(inode->i_mode)) + d_move(alias, dentry); + iput(inode); + unlock_super(sb); + return alias; } out: unlock_super(sb); From 45cfbe354785a5bc9a38354754d6f7322f598001 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:53 -0800 Subject: [PATCH 161/173] fat: Cleanup msdos_lookup() Use same style with vfat_lookup(). Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/namei_msdos.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index e92e8158ebaf..7ba03a4acbe0 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -203,33 +203,37 @@ static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry, { struct super_block *sb = dir->i_sb; struct fat_slot_info sinfo; - struct inode *inode = NULL; - int res; - - dentry->d_op = &msdos_dentry_operations; + struct inode *inode; + int err; lock_super(sb); - res = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); - if (res == -ENOENT) - goto add; - if (res < 0) - goto out; + + err = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); + if (err) { + if (err == -ENOENT) { + inode = NULL; + goto out; + } + goto error; + } + inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); brelse(sinfo.bh); if (IS_ERR(inode)) { - res = PTR_ERR(inode); - goto out; + err = PTR_ERR(inode); + goto error; } -add: - res = 0; +out: + unlock_super(sb); + dentry->d_op = &msdos_dentry_operations; dentry = d_splice_alias(inode, dentry); if (dentry) dentry->d_op = &msdos_dentry_operations; -out: + return dentry; + +error: unlock_super(sb); - if (!res) - return dentry; - return ERR_PTR(res); + return ERR_PTR(err); } /***** Creates a directory entry (name is already formatted). */ From 9c0aa1b87bf541affef519eb4879ce7c5a5941ae Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:54 -0800 Subject: [PATCH 162/173] fat: Cleanup FAT attribute stuff This adds three helpers: fat_make_attrs() - makes FAT attributes from inode. fat_make_mode() - makes mode_t from FAT attributes. fat_save_attrs() - saves FAT attributes to inode. Then this replaces: MSDOS_MKMODE() by fat_make_mode(), fat_attr() by fat_make_attrs(), ->i_attrs = attr & ATTR_UNUSED by fat_save_attrs(). And for root inode, those is used with ATTR_DIR instead of bogus ATTR_NONE. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/fat.h | 20 +++++++++++++++++++- fs/fat/file.c | 32 ++++++++++++-------------------- fs/fat/inode.c | 19 +++++++++---------- include/linux/msdos_fs.h | 5 ----- 4 files changed, 40 insertions(+), 36 deletions(-) diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 2b8e94c3eef4..3b4753a024e3 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -117,14 +117,32 @@ static inline struct msdos_inode_info *MSDOS_I(struct inode *inode) return container_of(inode, struct msdos_inode_info, vfs_inode); } +/* Convert attribute bits and a mask to the UNIX mode. */ +static inline mode_t fat_make_mode(struct msdos_sb_info *sbi, + u8 attrs, mode_t mode) +{ + if (attrs & ATTR_RO) + mode &= ~S_IWUGO; + + if (attrs & ATTR_DIR) + return (mode & ~sbi->options.fs_dmask) | S_IFDIR; + else + return (mode & ~sbi->options.fs_fmask) | S_IFREG; +} + /* Return the FAT attribute byte for this inode */ -static inline u8 fat_attr(struct inode *inode) +static inline u8 fat_make_attrs(struct inode *inode) { return ((inode->i_mode & S_IWUGO) ? ATTR_NONE : ATTR_RO) | (S_ISDIR(inode->i_mode) ? ATTR_DIR : ATTR_NONE) | MSDOS_I(inode)->i_attrs; } +static inline void fat_save_attrs(struct inode *inode, u8 attrs) +{ + MSDOS_I(inode)->i_attrs = attrs & ATTR_UNUSED; +} + static inline unsigned char fat_checksum(const __u8 *name) { unsigned char s = name[0]; diff --git a/fs/fat/file.c b/fs/fat/file.c index b21973f266a1..f5a7e907a8fa 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -27,13 +27,7 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp, switch (cmd) { case FAT_IOCTL_GET_ATTRIBUTES: { - u32 attr; - - if (inode->i_ino == MSDOS_ROOT_INO) - attr = ATTR_DIR; - else - attr = fat_attr(inode); - + u32 attr = fat_make_attrs(inode); return put_user(attr, user_attr); } case FAT_IOCTL_SET_ATTRIBUTES: @@ -62,20 +56,16 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp, /* Merge in ATTR_VOLUME and ATTR_DIR */ attr |= (MSDOS_I(inode)->i_attrs & ATTR_VOLUME) | (is_dir ? ATTR_DIR : 0); - oldattr = fat_attr(inode); + oldattr = fat_make_attrs(inode); /* Equivalent to a chmod() */ ia.ia_valid = ATTR_MODE | ATTR_CTIME; ia.ia_ctime = current_fs_time(inode->i_sb); - if (is_dir) { - ia.ia_mode = MSDOS_MKMODE(attr, - S_IRWXUGO & ~sbi->options.fs_dmask) - | S_IFDIR; - } else { - ia.ia_mode = MSDOS_MKMODE(attr, - (S_IRUGO | S_IWUGO | (inode->i_mode & S_IXUGO)) - & ~sbi->options.fs_fmask) - | S_IFREG; + if (is_dir) + ia.ia_mode = fat_make_mode(sbi, attr, S_IRWXUGO); + else { + ia.ia_mode = fat_make_mode(sbi, attr, + S_IRUGO | S_IWUGO | (inode->i_mode & S_IXUGO)); } /* The root directory has no attributes */ @@ -115,7 +105,7 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp, inode->i_flags &= S_IMMUTABLE; } - MSDOS_I(inode)->i_attrs = attr & ATTR_UNUSED; + fat_save_attrs(inode, attr); mark_inode_dirty(inode); up: mnt_drop_write(filp->f_path.mnt); @@ -274,7 +264,7 @@ static int fat_sanitize_mode(const struct msdos_sb_info *sbi, /* * Note, the basic check is already done by a caller of - * (attr->ia_mode & ~MSDOS_VALID_MODE) + * (attr->ia_mode & ~FAT_VALID_MODE) */ if (S_ISREG(inode->i_mode)) @@ -314,6 +304,8 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode) } #define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET) +/* valid file mode bits */ +#define FAT_VALID_MODE (S_IFREG | S_IFDIR | S_IRWXUGO) int fat_setattr(struct dentry *dentry, struct iattr *attr) { @@ -356,7 +348,7 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) ((attr->ia_valid & ATTR_GID) && (attr->ia_gid != sbi->options.fs_gid)) || ((attr->ia_valid & ATTR_MODE) && - (attr->ia_mode & ~MSDOS_VALID_MODE))) + (attr->ia_mode & ~FAT_VALID_MODE))) error = -EPERM; if (error) { diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 8e1b75c63c7f..7aaa21cf019a 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -337,8 +337,7 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) if ((de->attr & ATTR_DIR) && !IS_FREE(de->name)) { inode->i_generation &= ~1; - inode->i_mode = MSDOS_MKMODE(de->attr, - S_IRWXUGO & ~sbi->options.fs_dmask) | S_IFDIR; + inode->i_mode = fat_make_mode(sbi, de->attr, S_IRWXUGO); inode->i_op = sbi->dir_ops; inode->i_fop = &fat_dir_operations; @@ -355,10 +354,9 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) inode->i_nlink = fat_subdirs(inode); } else { /* not a directory */ inode->i_generation |= 1; - inode->i_mode = MSDOS_MKMODE(de->attr, - ((sbi->options.showexec && !is_exec(de->name + 8)) - ? S_IRUGO|S_IWUGO : S_IRWXUGO) - & ~sbi->options.fs_fmask) | S_IFREG; + inode->i_mode = fat_make_mode(sbi, de->attr, + ((sbi->options.showexec && !is_exec(de->name + 8)) + ? S_IRUGO|S_IWUGO : S_IRWXUGO)); MSDOS_I(inode)->i_start = le16_to_cpu(de->start); if (sbi->fat_bits == 32) MSDOS_I(inode)->i_start |= (le16_to_cpu(de->starthi) << 16); @@ -374,7 +372,8 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) if (sbi->options.sys_immutable) inode->i_flags |= S_IMMUTABLE; } - MSDOS_I(inode)->i_attrs = de->attr & ATTR_UNUSED; + fat_save_attrs(inode, de->attr); + inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1)) & ~((loff_t)sbi->cluster_size - 1)) >> 9; @@ -569,7 +568,7 @@ static int fat_write_inode(struct inode *inode, int wait) raw_entry->size = 0; else raw_entry->size = cpu_to_le32(inode->i_size); - raw_entry->attr = fat_attr(inode); + raw_entry->attr = fat_make_attrs(inode); raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart); raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16); fat_time_unix2fat(sbi, &inode->i_mtime, &raw_entry->time, @@ -1105,7 +1104,7 @@ static int fat_read_root(struct inode *inode) inode->i_gid = sbi->options.fs_gid; inode->i_version++; inode->i_generation = 0; - inode->i_mode = (S_IRWXUGO & ~sbi->options.fs_dmask) | S_IFDIR; + inode->i_mode = fat_make_mode(sbi, ATTR_DIR, S_IRWXUGO); inode->i_op = sbi->dir_ops; inode->i_fop = &fat_dir_operations; if (sbi->fat_bits == 32) { @@ -1122,7 +1121,7 @@ static int fat_read_root(struct inode *inode) MSDOS_I(inode)->i_logstart = 0; MSDOS_I(inode)->mmu_private = inode->i_size; - MSDOS_I(inode)->i_attrs = ATTR_NONE; + fat_save_attrs(inode, ATTR_DIR); inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = 0; inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = 0; inode->i_nlink = fat_subdirs(inode)+2; diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h index 0982fb47a90d..e0a9b207920d 100644 --- a/include/linux/msdos_fs.h +++ b/include/linux/msdos_fs.h @@ -46,11 +46,6 @@ #define DELETED_FLAG 0xe5 /* marks file as deleted when in name[0] */ #define IS_FREE(n) (!*(n) || *(n) == DELETED_FLAG) -/* valid file mode bits */ -#define MSDOS_VALID_MODE (S_IFREG | S_IFDIR | S_IRWXU | S_IRWXG | S_IRWXO) -/* Convert attribute bits and a mask to the UNIX mode. */ -#define MSDOS_MKMODE(a, m) (m & (a & ATTR_RO ? S_IRUGO|S_IXUGO : S_IRWXUGO)) - #define MSDOS_NAME 11 /* maximum name length */ #define MSDOS_LONGNAME 256 /* maximum name length */ #define MSDOS_SLOTS 21 /* max # of slots for short and long names */ From 9183482f5d4a2de00f66641b974e7f351d41b675 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:54 -0800 Subject: [PATCH 163/173] fat: Fix ATTR_RO in the case of (~umask & S_WUGO) == 0 If inode->i_mode doesn't have S_WUGO, current code assumes it means ATTR_RO. However, if (~[ufd]mask & S_WUGO) == 0, inode->i_mode can't hold S_WUGO. Therefore the updated directory entry will always have ATTR_RO. This adds fat_mode_can_hold_ro() to check it. And if inode->i_mode can't hold, uses -i_attrs to hold ATTR_RO instead. With this, we don't set ATTR_RO unless users change it via ioctl() if (~[ufd]mask & S_WUGO) == 0. And on FAT_IOCTL_GET_ATTRIBUTES path, this adds ->i_mutex to it for not returning the partially updated attributes by FAT_IOCTL_SET_ATTRIBUTES to userland. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/fat.h | 33 +++++++++++++++++++++++++++++---- fs/fat/file.c | 7 ++++++- 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 3b4753a024e3..313b645b8126 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -117,6 +117,25 @@ static inline struct msdos_inode_info *MSDOS_I(struct inode *inode) return container_of(inode, struct msdos_inode_info, vfs_inode); } +/* + * If ->i_mode can't hold S_IWUGO (i.e. ATTR_RO), we use ->i_attrs to + * save ATTR_RO instead of ->i_mode. + */ +static inline int fat_mode_can_hold_ro(struct inode *inode) +{ + struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); + mode_t mask; + + if (S_ISDIR(inode->i_mode)) + mask = ~sbi->options.fs_dmask; + else + mask = ~sbi->options.fs_fmask; + + if (!(mask & S_IWUGO)) + return 0; + return 1; +} + /* Convert attribute bits and a mask to the UNIX mode. */ static inline mode_t fat_make_mode(struct msdos_sb_info *sbi, u8 attrs, mode_t mode) @@ -133,14 +152,20 @@ static inline mode_t fat_make_mode(struct msdos_sb_info *sbi, /* Return the FAT attribute byte for this inode */ static inline u8 fat_make_attrs(struct inode *inode) { - return ((inode->i_mode & S_IWUGO) ? ATTR_NONE : ATTR_RO) | - (S_ISDIR(inode->i_mode) ? ATTR_DIR : ATTR_NONE) | - MSDOS_I(inode)->i_attrs; + u8 attrs = MSDOS_I(inode)->i_attrs; + if (S_ISDIR(inode->i_mode)) + attrs |= ATTR_DIR; + if (fat_mode_can_hold_ro(inode) && !(inode->i_mode & S_IWUGO)) + attrs |= ATTR_RO; + return attrs; } static inline void fat_save_attrs(struct inode *inode, u8 attrs) { - MSDOS_I(inode)->i_attrs = attrs & ATTR_UNUSED; + if (fat_mode_can_hold_ro(inode)) + MSDOS_I(inode)->i_attrs = attrs & ATTR_UNUSED; + else + MSDOS_I(inode)->i_attrs = attrs & (ATTR_UNUSED | ATTR_RO); } static inline unsigned char fat_checksum(const __u8 *name) diff --git a/fs/fat/file.c b/fs/fat/file.c index f5a7e907a8fa..81b15c623803 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -27,7 +27,12 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp, switch (cmd) { case FAT_IOCTL_GET_ATTRIBUTES: { - u32 attr = fat_make_attrs(inode); + u32 attr; + + mutex_lock(&inode->i_mutex); + attr = fat_make_attrs(inode); + mutex_unlock(&inode->i_mutex); + return put_user(attr, user_attr); } case FAT_IOCTL_SET_ATTRIBUTES: From dfc209c0064efef5590f608056a48b61a5cac09c Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:55 -0800 Subject: [PATCH 164/173] fat: Fix ATTR_RO for directory FAT has the ATTR_RO (read-only) attribute. But on Windows, the ATTR_RO of the directory will be just ignored actually, and is used by only applications as flag. E.g. it's setted for the customized folder by Explorer. http://msdn2.microsoft.com/en-us/library/aa969337.aspx This adds "rodir" option. If user specified it, ATTR_RO is used as read-only flag even if it's the directory. Otherwise, inode->i_mode is not used to hold ATTR_RO (i.e. fat_mode_can_save_ro() returns 0). Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/vfat.txt | 8 ++++++++ fs/fat/fat.h | 14 ++++++++++---- fs/fat/file.c | 16 ++++++++++++---- fs/fat/inode.c | 17 +++++++++++++---- 4 files changed, 43 insertions(+), 12 deletions(-) diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt index dc9dc73d7d38..3a5ddc96901a 100644 --- a/Documentation/filesystems/vfat.txt +++ b/Documentation/filesystems/vfat.txt @@ -124,6 +124,14 @@ sys_immutable -- If set, ATTR_SYS attribute on FAT is handled as flush -- If set, the filesystem will try to flush to disk more early than normal. Not set by default. +rodir -- FAT has the ATTR_RO (read-only) attribute. But on Windows, + the ATTR_RO of the directory will be just ignored actually, + and is used by only applications as flag. E.g. it's setted + for the customized folder. + + If you want to use ATTR_RO as read-only flag even for + the directory, set this option. + : 0,1,yes,no,true,false TODO diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 313b645b8126..e9dce5d8e7a7 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -38,7 +38,8 @@ struct fat_mount_options { flush:1, /* write things quickly */ nocase:1, /* Does this need case conversion? 0=need case conversion*/ usefree:1, /* Use free_clusters for FAT32 */ - tz_utc:1; /* Filesystem timestamps are in UTC */ + tz_utc:1, /* Filesystem timestamps are in UTC */ + rodir:1; /* allow ATTR_RO for directory */ }; #define FAT_HASH_BITS 8 @@ -120,15 +121,20 @@ static inline struct msdos_inode_info *MSDOS_I(struct inode *inode) /* * If ->i_mode can't hold S_IWUGO (i.e. ATTR_RO), we use ->i_attrs to * save ATTR_RO instead of ->i_mode. + * + * If it's directory and !sbi->options.rodir, ATTR_RO isn't read-only + * bit, it's just used as flag for app. */ static inline int fat_mode_can_hold_ro(struct inode *inode) { struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); mode_t mask; - if (S_ISDIR(inode->i_mode)) + if (S_ISDIR(inode->i_mode)) { + if (!sbi->options.rodir) + return 0; mask = ~sbi->options.fs_dmask; - else + } else mask = ~sbi->options.fs_fmask; if (!(mask & S_IWUGO)) @@ -140,7 +146,7 @@ static inline int fat_mode_can_hold_ro(struct inode *inode) static inline mode_t fat_make_mode(struct msdos_sb_info *sbi, u8 attrs, mode_t mode) { - if (attrs & ATTR_RO) + if (attrs & ATTR_RO && !((attrs & ATTR_DIR) && !sbi->options.rodir)) mode &= ~S_IWUGO; if (attrs & ATTR_DIR) diff --git a/fs/fat/file.c b/fs/fat/file.c index 81b15c623803..f06a4e525ece 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -282,11 +282,18 @@ static int fat_sanitize_mode(const struct msdos_sb_info *sbi, /* * Of the r and x bits, all (subject to umask) must be present. Of the * w bits, either all (subject to umask) or none must be present. + * + * If fat_mode_can_hold_ro(inode) is false, can't change w bits. */ if ((perm & (S_IRUGO | S_IXUGO)) != (inode->i_mode & (S_IRUGO|S_IXUGO))) return -EPERM; - if ((perm & S_IWUGO) && ((perm & S_IWUGO) != (S_IWUGO & ~mask))) - return -EPERM; + if (fat_mode_can_hold_ro(inode)) { + if ((perm & S_IWUGO) && ((perm & S_IWUGO) != (S_IWUGO & ~mask))) + return -EPERM; + } else { + if ((perm & S_IWUGO) != (S_IWUGO & ~mask)) + return -EPERM; + } *mode_ptr &= S_IFMT | perm; @@ -316,8 +323,8 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) { struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb); struct inode *inode = dentry->d_inode; - int error = 0; unsigned int ia_valid; + int error; /* * Expand the file. Since inode_setattr() updates ->i_size @@ -371,7 +378,8 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) attr->ia_valid &= ~ATTR_MODE; } - error = inode_setattr(inode, attr); + if (attr->ia_valid) + error = inode_setattr(inode, attr); out: return error; } diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 7aaa21cf019a..0da04e6d1e34 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -797,8 +797,10 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt) seq_puts(m, ",uni_xlate"); if (!opts->numtail) seq_puts(m, ",nonumtail"); + if (opts->rodir) + seq_puts(m, ",rodir"); } - if (sbi->options.flush) + if (opts->flush) seq_puts(m, ",flush"); if (opts->tz_utc) seq_puts(m, ",tz=UTC"); @@ -814,7 +816,7 @@ enum { Opt_charset, Opt_shortname_lower, Opt_shortname_win95, Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes, Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, - Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_err, + Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err, }; static const match_table_t fat_tokens = { @@ -886,6 +888,7 @@ static const match_table_t vfat_tokens = { {Opt_nonumtail_yes, "nonumtail=yes"}, {Opt_nonumtail_yes, "nonumtail=true"}, {Opt_nonumtail_yes, "nonumtail"}, + {Opt_rodir, "rodir"}, {Opt_err, NULL} }; @@ -905,10 +908,13 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug, opts->allow_utime = -1; opts->codepage = fat_default_codepage; opts->iocharset = fat_default_iocharset; - if (is_vfat) + if (is_vfat) { opts->shortname = VFAT_SFN_DISPLAY_LOWER|VFAT_SFN_CREATE_WIN95; - else + opts->rodir = 0; + } else { opts->shortname = 0; + opts->rodir = 1; + } opts->name_check = 'n'; opts->quiet = opts->showexec = opts->sys_immutable = opts->dotsOK = 0; opts->utf8 = opts->unicode_xlate = 0; @@ -1059,6 +1065,9 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug, case Opt_nonumtail_yes: /* empty or 1 or yes or true */ opts->numtail = 0; /* negated option */ break; + case Opt_rodir: + opts->rodir = 1; + break; /* obsolete mount options */ case Opt_obsolate: From fa93ca18a8b0da4e26bd9491ad144cd14d22f8ec Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:56 -0800 Subject: [PATCH 165/173] fat: Fix _fat_bmap() race fat_get_cluster() assumes the requested blocknr isn't truncated during read. _fat_bmap() doesn't follow this rule. This protects it by ->i_mutex. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/inode.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 0da04e6d1e34..be88208b83a6 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -199,7 +199,14 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, static sector_t _fat_bmap(struct address_space *mapping, sector_t block) { - return generic_block_bmap(mapping, block, fat_get_block); + sector_t blocknr; + + /* fat_get_cluster() assumes the requested blocknr isn't truncated. */ + mutex_lock(&mapping->host->i_mutex); + blocknr = generic_block_bmap(mapping, block, fat_get_block); + mutex_unlock(&mapping->host->i_mutex); + + return blocknr; } static const struct address_space_operations fat_aops = { From 0e75f5da06c05425f4b375eb981c4489fb2d9787 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:56 -0800 Subject: [PATCH 166/173] fat: Add printf attribute to fat_fs_panic() Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/fat.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/fat/fat.h b/fs/fat/fat.h index e9dce5d8e7a7..a69f7f9757c0 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -308,7 +308,8 @@ extern int fat_fill_super(struct super_block *sb, void *data, int silent, extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, struct inode *i2); /* fat/misc.c */ -extern void fat_fs_panic(struct super_block *s, const char *fmt, ...); +extern void fat_fs_panic(struct super_block *s, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))) __cold; extern void fat_clusters_flush(struct super_block *sb); extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster); extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts, From 2bdf67eb1631f30e2f3f5d49e4007c76e88877a8 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:57 -0800 Subject: [PATCH 167/173] fat: mmu_private race fix mmu_private is 64bits value, hence it's not atomic to update. So, the access rule for mmu_private is we must hold ->i_mutex. But, fat_get_block() path doesn't follow the rule on non-allocation path. This fixes by using i_size instead if non-allocation path. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/cache.c | 23 ++++++++++++++++++----- fs/fat/dir.c | 2 +- fs/fat/fat.h | 6 ++++-- fs/fat/inode.c | 4 ++-- 4 files changed, 25 insertions(+), 10 deletions(-) diff --git a/fs/fat/cache.c b/fs/fat/cache.c index 589edde9053c..b42602298087 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c @@ -293,10 +293,12 @@ static int fat_bmap_cluster(struct inode *inode, int cluster) } int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, - unsigned long *mapped_blocks) + unsigned long *mapped_blocks, int create) { struct super_block *sb = inode->i_sb; struct msdos_sb_info *sbi = MSDOS_SB(sb); + const unsigned long blocksize = sb->s_blocksize; + const unsigned char blocksize_bits = sb->s_blocksize_bits; sector_t last_block; int cluster, offset; @@ -309,10 +311,21 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, } return 0; } - last_block = (MSDOS_I(inode)->mmu_private + (sb->s_blocksize - 1)) - >> sb->s_blocksize_bits; - if (sector >= last_block) - return 0; + + last_block = (i_size_read(inode) + (blocksize - 1)) >> blocksize_bits; + if (sector >= last_block) { + if (!create) + return 0; + + /* + * ->mmu_private can access on only allocation path. + * (caller must hold ->i_mutex) + */ + last_block = (MSDOS_I(inode)->mmu_private + (blocksize - 1)) + >> blocksize_bits; + if (sector >= last_block) + return 0; + } cluster = sector >> (sbi->cluster_bits - sb->s_blocksize_bits); offset = sector & (sbi->sec_per_clus - 1); diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 140fc39e2307..2ecaa17acdb5 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -77,7 +77,7 @@ static int fat__get_entry(struct inode *dir, loff_t *pos, *bh = NULL; iblock = *pos >> sb->s_blocksize_bits; - err = fat_bmap(dir, iblock, &phys, &mapped_blocks); + err = fat_bmap(dir, iblock, &phys, &mapped_blocks, 0); if (err || !phys) return -1; /* beyond EOF or error */ diff --git a/fs/fat/fat.h b/fs/fat/fat.h index a69f7f9757c0..4efc5038ed29 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -91,7 +91,9 @@ struct msdos_inode_info { /* for avoiding the race between fat_free() and fat_get_cluster() */ unsigned int cache_valid_id; - loff_t mmu_private; + /* NOTE: mmu_private is 64bits, so must hold ->i_mutex to access */ + loff_t mmu_private; /* physically allocated size */ + int i_start; /* first cluster or 0 */ int i_logstart; /* logical first cluster */ int i_attrs; /* unused attribute bits */ @@ -222,7 +224,7 @@ extern void fat_cache_inval_inode(struct inode *inode); extern int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus); extern int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, - unsigned long *mapped_blocks); + unsigned long *mapped_blocks, int create); /* fat/dir.c */ extern const struct file_operations fat_dir_operations; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index be88208b83a6..9e37ad93c730 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -64,7 +64,7 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock, sector_t phys; int err, offset; - err = fat_bmap(inode, iblock, &phys, &mapped_blocks); + err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create); if (err) return err; if (phys) { @@ -94,7 +94,7 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock, *max_blocks = min(mapped_blocks, *max_blocks); MSDOS_I(inode)->mmu_private += *max_blocks << sb->s_blocksize_bits; - err = fat_bmap(inode, iblock, &phys, &mapped_blocks); + err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create); if (err) return err; From 9ca59f4c3d28df14a1545a1e2832f34a0a50e3ed Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:57 -0800 Subject: [PATCH 168/173] fat: ->i_pos race fix i_pos is 64bits value, hence it's not atomic to update. Important place is fat_write_inode() only, other places without lock are just for printk(). This adds lock for "BITS_PER_LONG == 32" kernel. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/inode.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 9e37ad93c730..bdd8fb7be2ca 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -542,6 +542,20 @@ static int fat_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } +static inline loff_t fat_i_pos_read(struct msdos_sb_info *sbi, + struct inode *inode) +{ + loff_t i_pos; +#if BITS_PER_LONG == 32 + spin_lock(&sbi->inode_hash_lock); +#endif + i_pos = MSDOS_I(inode)->i_pos; +#if BITS_PER_LONG == 32 + spin_unlock(&sbi->inode_hash_lock); +#endif + return i_pos; +} + static int fat_write_inode(struct inode *inode, int wait) { struct super_block *sb = inode->i_sb; @@ -551,9 +565,12 @@ static int fat_write_inode(struct inode *inode, int wait) loff_t i_pos; int err; + if (inode->i_ino == MSDOS_ROOT_INO) + return 0; + retry: - i_pos = MSDOS_I(inode)->i_pos; - if (inode->i_ino == MSDOS_ROOT_INO || !i_pos) + i_pos = fat_i_pos_read(sbi, inode); + if (!i_pos) return 0; bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits); From c3302931db090d87e9015c3a7ce5c97a7dd90f78 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:58 -0800 Subject: [PATCH 169/173] fat: i_blocks warning fix blkcnt_t type depends on CONFIG_LSF. Use unsigned long long always for printk(). But lazy to type it, so add "llu" and use it. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/dir.c | 2 +- fs/fat/fat.h | 3 +++ fs/fat/fatent.c | 5 ++--- fs/fat/misc.c | 5 +++-- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 2ecaa17acdb5..67e058357098 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -86,7 +86,7 @@ static int fat__get_entry(struct inode *dir, loff_t *pos, *bh = sb_bread(sb, phys); if (*bh == NULL) { printk(KERN_ERR "FAT: Directory bread(block %llu) failed\n", - (unsigned long long)phys); + (llu)phys); /* skip this block */ *pos = (iblock + 1) << sb->s_blocksize_bits; goto next; diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 4efc5038ed29..ea440d65819c 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -323,4 +323,7 @@ extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs); int fat_cache_init(void); void fat_cache_destroy(void); +/* helper for printk */ +typedef unsigned long long llu; + #endif /* !_FAT_H */ diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 13513992da3c..da6eea47872f 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -93,8 +93,7 @@ static int fat12_ent_bread(struct super_block *sb, struct fat_entry *fatent, err_brelse: brelse(bhs[0]); err: - printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", - (unsigned long long)blocknr); + printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", (llu)blocknr); return -EIO; } @@ -107,7 +106,7 @@ static int fat_ent_bread(struct super_block *sb, struct fat_entry *fatent, fatent->bhs[0] = sb_bread(sb, blocknr); if (!fatent->bhs[0]) { printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", - (unsigned long long)blocknr); + (llu)blocknr); return -EIO; } fatent->nr_bhs = 1; diff --git a/fs/fat/misc.c b/fs/fat/misc.c index a191e79e66a9..ac39ebcc1496 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -124,8 +124,9 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster) mark_inode_dirty(inode); } if (new_fclus != (inode->i_blocks >> (sbi->cluster_bits - 9))) { - fat_fs_panic(sb, "clusters badly computed (%d != %lu)", - new_fclus, inode->i_blocks >> (sbi->cluster_bits - 9)); + fat_fs_panic(sb, "clusters badly computed (%d != %llu)", + new_fclus, + (llu)(inode->i_blocks >> (sbi->cluster_bits - 9))); fat_cache_inval_inode(inode); } inode->i_blocks += nr_cluster << (sbi->cluster_bits - 9); From 7597bc94d6f3bdccb086ac7f2ad91292fdaee2a4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Nov 2008 17:38:47 +0000 Subject: [PATCH 170/173] Fix accidental implicit cast in HR-timer conversion Fix the hrtimer_add_expires_ns() function. It should take a 'u64 ns' argument, but rather takes an 'unsigned long ns' argument - which might only be 32-bits. On FRV, this results in the kernel locking up because hrtimer_forward() passes the result of a 64-bit multiplication to this function, for which the compiler discards the top 32-bits - something that didn't happen when ktime_add_ns() was called directly. Signed-off-by: David Howells Acked-by: Arjan van de Ven Signed-off-by: Linus Torvalds --- include/linux/hrtimer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 2b3645b1acf4..07e510a3b00a 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -239,7 +239,7 @@ static inline void hrtimer_add_expires(struct hrtimer *timer, ktime_t time) timer->_softexpires = ktime_add_safe(timer->_softexpires, time); } -static inline void hrtimer_add_expires_ns(struct hrtimer *timer, unsigned long ns) +static inline void hrtimer_add_expires_ns(struct hrtimer *timer, u64 ns) { timer->_expires = ktime_add_ns(timer->_expires, ns); timer->_softexpires = ktime_add_ns(timer->_softexpires, ns); From 3b53fbf4314594fa04544b02b2fc6e607912da18 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 6 Nov 2008 15:45:32 -0800 Subject: [PATCH 171/173] net: Fix recursive descent in __scm_destroy(). __scm_destroy() walks the list of file descriptors in the scm_fp_list pointed to by the scm_cookie argument. Those, in turn, can close sockets and invoke __scm_destroy() again. There is nothing which limits how deeply this can occur. The idea for how to fix this is from Linus. Basically, we do all of the fput()s at the top level by collecting all of the scm_fp_list objects hit by an fput(). Inside of the initial __scm_destroy() we keep running the list until it is empty. Signed-off-by: David S. Miller --- include/linux/sched.h | 2 ++ include/net/scm.h | 5 +++-- net/core/scm.c | 24 +++++++++++++++++++++--- 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index b483f39a7112..295b7c756ca6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1349,6 +1349,8 @@ struct task_struct { */ unsigned long timer_slack_ns; unsigned long default_timer_slack_ns; + + struct list_head *scm_work_list; }; /* diff --git a/include/net/scm.h b/include/net/scm.h index 06df126103ca..33e9986beb86 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -14,8 +14,9 @@ struct scm_fp_list { - int count; - struct file *fp[SCM_MAX_FD]; + struct list_head list; + int count; + struct file *fp[SCM_MAX_FD]; }; struct scm_cookie diff --git a/net/core/scm.c b/net/core/scm.c index 10f5c65f6a47..ab242cc1acca 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -75,6 +75,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) if (!fpl) return -ENOMEM; *fplp = fpl; + INIT_LIST_HEAD(&fpl->list); fpl->count = 0; } fpp = &fpl->fp[fpl->count]; @@ -106,9 +107,25 @@ void __scm_destroy(struct scm_cookie *scm) if (fpl) { scm->fp = NULL; - for (i=fpl->count-1; i>=0; i--) - fput(fpl->fp[i]); - kfree(fpl); + if (current->scm_work_list) { + list_add_tail(&fpl->list, current->scm_work_list); + } else { + LIST_HEAD(work_list); + + current->scm_work_list = &work_list; + + list_add(&fpl->list, &work_list); + while (!list_empty(&work_list)) { + fpl = list_first_entry(&work_list, struct scm_fp_list, list); + + list_del(&fpl->list); + for (i=fpl->count-1; i>=0; i--) + fput(fpl->fp[i]); + kfree(fpl); + } + + current->scm_work_list = NULL; + } } } @@ -284,6 +301,7 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL); if (new_fpl) { + INIT_LIST_HEAD(&new_fpl->list); for (i=fpl->count-1; i>=0; i--) get_file(fpl->fp[i]); memcpy(new_fpl, fpl, sizeof(*fpl)); From f29c9b1ccb52904ee442a933cf3dee628f9f4e62 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 6 Nov 2008 09:45:16 +0800 Subject: [PATCH 172/173] sched: fix a bug in sched domain degenerate Impact: re-add incorrectly eliminated sched domain layers (1) on i386 with SCHED_SMT and SCHED_MC enabled # mount -t cgroup -o cpuset xxx /mnt # echo 0 > /mnt/cpuset.sched_load_balance # mkdir /mnt/0 # echo 0 > /mnt/0/cpuset.cpus # dmesg CPU0 attaching sched-domain: domain 0: span 0 level CPU groups: 0 (2) on i386 with SCHED_MC enabled but SCHED_SMT disabled # same with (1) # dmesg CPU0 attaching NULL sched-domain. The bug is that some sched domains may be skipped unintentionally when degenerating (optimizing) sched domains. Signed-off-by: Li Zefan Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 82cc839c9210..4c7e2bcdfa89 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -6877,15 +6877,17 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu) struct sched_domain *tmp; /* Remove the sched domains which do not contribute to scheduling. */ - for (tmp = sd; tmp; tmp = tmp->parent) { + for (tmp = sd; tmp; ) { struct sched_domain *parent = tmp->parent; if (!parent) break; + if (sd_parent_degenerate(tmp, parent)) { tmp->parent = parent->parent; if (parent->parent) parent->parent->child = tmp; - } + } else + tmp = tmp->parent; } if (sd && sd_degenerate(sd)) { From ca3273f9646694e0419cfb9d6c12deb1c9aff27c Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 7 Nov 2008 14:47:21 +0800 Subject: [PATCH 173/173] sched: fix memory leak in a failure path Impact: fix rare memory leak in the sched-domains manual reconfiguration code In the failure path, rd is not attached to a sched domain, so it causes a leak. Signed-off-by: Li Zefan Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/sched.c b/kernel/sched.c index 4c7e2bcdfa89..57c933ffbee1 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7676,6 +7676,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, error: free_sched_groups(cpu_map, tmpmask); SCHED_CPUMASK_FREE((void *)allmasks); + kfree(rd); return -ENOMEM; #endif }