mirror of
https://github.com/rd-stuffs/msm-4.14.git
synced 2025-02-20 11:45:48 +08:00
* refs/heads/tmp-c680586: dm: Restore reverted changes Linux 4.14.114 kernel/sysctl.c: fix out-of-bounds access when setting file-max Revert "locking/lockdep: Add debug_locks check in __lock_downgrade()" i2c-hid: properly terminate i2c_hid_dmi_desc_override_table[] array xfs: hold xfs_buf locked between shortform->leaf conversion and the addition of an attribute xfs: add the ability to join a held buffer to a defer_ops iomap: report collisions between directio and buffered writes to userspace tools include: Adopt linux/bits.h percpu: stop printing kernel addresses ALSA: info: Fix racy addition/deletion of nodes mm/vmstat.c: fix /proc/vmstat format for CONFIG_DEBUG_TLBFLUSH=y CONFIG_SMP=n device_cgroup: fix RCU imbalance in error case sched/fair: Limit sched_cfs_period_timer() loop to avoid hard lockup Revert "kbuild: use -Oz instead of -Os when using clang" net: IP6 defrag: use rbtrees in nf_conntrack_reasm.c net: IP6 defrag: use rbtrees for IPv6 defrag ipv6: remove dependency of nf_defrag_ipv6 on ipv6 module net: IP defrag: encapsulate rbtree defrag code into callable functions ipv6: frags: fix a lockdep false positive tpm/tpm_i2c_atmel: Return -E2BIG when the transfer is incomplete modpost: file2alias: check prototype of handler modpost: file2alias: go back to simple devtable lookup mmc: sdhci: Handle auto-command errors mmc: sdhci: Rename SDHCI_ACMD12_ERR and SDHCI_INT_ACMD12ERR mmc: sdhci: Fix data command CRC error handling crypto: crypto4xx - properly set IV after de- and encrypt x86/speculation: Prevent deadlock on ssb_state::lock perf/x86: Fix incorrect PEBS_REGS x86/cpu/bugs: Use __initconst for 'const' init data perf/x86/amd: Add event map for AMD Family 17h mac80211: do not call driver wake_tx_queue op during reconfig rt2x00: do not increment sequence number while re-transmitting kprobes: Fix error check when reusing optimized probes kprobes: Mark ftrace mcount handler functions nokprobe x86/kprobes: Verify stack frame on kretprobe arm64: futex: Restore oldval initialization to work around buggy compilers crypto: x86/poly1305 - fix overflow during partial reduction coredump: fix race condition between mmget_not_zero()/get_task_mm() and core dumping Revert "svm: Fix AVIC incomplete IPI emulation" Revert "scsi: fcoe: clear FC_RP_STARTED flags when receiving a LOGO" scsi: core: set result when the command cannot be dispatched ALSA: core: Fix card races between register and disconnect ALSA: hda/realtek - add two more pin configuration sets to quirk table staging: comedi: ni_usb6501: Fix possible double-free of ->usb_rx_buf staging: comedi: ni_usb6501: Fix use of uninitialized mutex staging: comedi: vmk80xx: Fix possible double-free of ->usb_rx_buf staging: comedi: vmk80xx: Fix use of uninitialized semaphore io: accel: kxcjk1013: restore the range after resume. iio: core: fix a possible circular locking dependency iio: adc: at91: disable adc channel interrupt in timeout case iio: Fix scan mask selection iio: dac: mcp4725: add missing powerdown bits in store eeprom iio: ad_sigma_delta: select channel when reading register iio: cros_ec: Fix the maths for gyro scale calculation iio/gyro/bmg160: Use millidegrees for temperature scale iio: gyro: mpu3050: fix chip ID reading staging: iio: ad7192: Fix ad7193 channel address Staging: iio: meter: fixed typo KVM: x86: svm: make sure NMI is injected after nmi_singlestep KVM: x86: Don't clear EFER during SMM transitions for 32-bit vCPU CIFS: keep FileInfo handle live during oplock break net: thunderx: don't allow jumbo frames with XDP net: thunderx: raise XDP MTU to 1508 ipv4: ensure rcu_read_lock() in ipv4_link_failure() ipv4: recompile ip options in ipv4_link_failure vhost: reject zero size iova range team: set slave to promisc if team is already in promisc mode tcp: tcp_grow_window() needs to respect tcp_space() net: fou: do not use guehdr after iptunnel_pull_offloads in gue_udp_recv net: bridge: multicast: use rcu to access port list from br_multicast_start_querier net: bridge: fix per-port af_packet sockets net: atm: Fix potential Spectre v1 vulnerabilities bonding: fix event handling for stacked bonds ANDROID: cuttlefish_defconfig: Enable CONFIG_XFRM_STATISTICS Linux 4.14.113 appletalk: Fix compile regression mm: hide incomplete nr_indirectly_reclaimable in sysfs net: stmmac: Set dma ring length before enabling the DMA bpf: Fix selftests are changes for CVE 2019-7308 bpf: fix sanitation rewrite in case of non-pointers bpf: do not restore dst_reg when cur_state is freed bpf: fix inner map masking to prevent oob under speculation bpf: fix sanitation of alu op with pointer / scalar type from different paths bpf: prevent out of bounds speculation on pointer arithmetic bpf: fix check_map_access smin_value test when pointer contains offset bpf: restrict unknown scalars of mixed signed bounds for unprivileged bpf: restrict stack pointer arithmetic for unprivileged bpf: restrict map value pointer arithmetic for unprivileged bpf: enable access to ax register also from verifier rewrite bpf: move tmp variable into ax register in interpreter bpf: move {prev_,}insn_idx into verifier env bpf: fix stack state printing in verifier log bpf: fix verifier NULL pointer dereference bpf: fix verifier memory leaks bpf: reduce verifier memory consumption dm: disable CRYPTO_TFM_REQ_MAY_SLEEP to fix a GFP_KERNEL recursion deadlock bpf: fix use after free in bpf_evict_inode include/linux/swap.h: use offsetof() instead of custom __swapoffset macro lib/div64.c: off by one in shift appletalk: Fix use-after-free in atalk_proc_exit drm/amdkfd: use init_mqd function to allocate object for hid_mqd (CI) ARM: 8839/1: kprobe: make patch_lock a raw_spinlock_t drm/nouveau/volt/gf117: fix speedo readout register coresight: cpu-debug: Support for CA73 CPUs Revert "ACPI / EC: Remove old CLEAR_ON_RESUME quirk" crypto: axis - fix for recursive locking from bottom half drm/panel: panel-innolux: set display off in innolux_panel_unprepare lkdtm: Add tests for NULL pointer dereference lkdtm: Print real addresses soc/tegra: pmc: Drop locking from tegra_powergate_is_powered() iommu/dmar: Fix buffer overflow during PCI bus notification crypto: sha512/arm - fix crash bug in Thumb2 build crypto: sha256/arm - fix crash bug in Thumb2 build kernel: hung_task.c: disable on suspend cifs: fallback to older infolevels on findfirst queryinfo retry compiler.h: update definition of unreachable() KVM: nVMX: restore host state in nested_vmx_vmexit for VMFail ACPI / SBS: Fix GPE storm on recent MacBookPro's usbip: fix vhci_hcd controller counting ARM: samsung: Limit SAMSUNG_PM_CHECK config option to non-Exynos platforms HID: i2c-hid: override HID descriptors for certain devices media: au0828: cannot kfree dev before usb disconnect powerpc/pseries: Remove prrn_work workqueue serial: uartps: console_setup() can't be placed to init section netfilter: xt_cgroup: shrink size of v2 path f2fs: fix to do sanity check with current segment number 9p locks: add mount option for lock retry interval 9p: do not trust pdu content for stat item size rsi: improve kernel thread handling to fix kernel panic gpio: pxa: handle corner case of unprobed device ext4: prohibit fstrim in norecovery mode fix incorrect error code mapping for OBJECTID_NOT_FOUND x86/hw_breakpoints: Make default case in hw_breakpoint_arch_parse() return an error iommu/vt-d: Check capability before disabling protected memory drm/nouveau/debugfs: Fix check of pm_runtime_get_sync failure x86/cpu/cyrix: Use correct macros for Cyrix calls on Geode processors x86/hpet: Prevent potential NULL pointer dereference irqchip/mbigen: Don't clear eventid when freeing an MSI perf tests: Fix a memory leak in test__perf_evsel__tp_sched_test() perf tests: Fix memory leak by expr__find_other() in test__expr() perf tests: Fix a memory leak of cpu_map object in the openat_syscall_event_on_all_cpus test perf evsel: Free evsel->counts in perf_evsel__exit() perf hist: Add missing map__put() in error case perf top: Fix error handling in cmd_top() perf build-id: Fix memory leak in print_sdt_events() perf config: Fix a memory leak in collect_config() perf config: Fix an error in the config template documentation perf list: Don't forget to drop the reference to the allocated thread_map tools/power turbostat: return the exit status of a command x86/mm: Don't leak kernel addresses scsi: iscsi: flush running unbind operations when removing a session thermal/intel_powerclamp: fix truncated kthread name thermal/int340x_thermal: fix mode setting thermal/int340x_thermal: Add additional UUIDs thermal: bcm2835: Fix crash in bcm2835_thermal_debugfs thermal/intel_powerclamp: fix __percpu declaration of worker_data ALSA: opl3: fix mismatch between snd_opl3_drum_switch definition and declaration mmc: davinci: remove extraneous __init annotation IB/mlx4: Fix race condition between catas error reset and aliasguid flows auxdisplay: hd44780: Fix memory leak on ->remove() ALSA: sb8: add a check for request_region ALSA: echoaudio: add a check for ioremap_nocache ext4: report real fs size after failed resize ext4: add missing brelse() in add_new_gdb_meta_bg() perf/core: Restore mmap record type correctly arc: hsdk_defconfig: Enable CONFIG_BLK_DEV_RAM ARC: u-boot args: check that magic number is correct ANDROID: cuttlefish_defconfig: Enable L2TP/PPTP ANDROID: Makefile: Properly resolve 4.14.112 merge Make arm64 serial port config compatible with crosvm Linux 4.14.112 arm64: dts: rockchip: Fix vcc_host1_5v GPIO polarity on rk3328-rock64 arm64: dts: rockchip: fix vcc_host1_5v pin assign on rk3328-rock64 dm table: propagate BDI_CAP_STABLE_WRITES to fix sporadic checksum errors PCI: Add function 1 DMA alias quirk for Marvell 9170 SATA controller x86/perf/amd: Remove need to check "running" bit in NMI handler x86/perf/amd: Resolve NMI latency issues for active PMCs x86/perf/amd: Resolve race condition when disabling PMC xtensa: fix return_address sched/fair: Do not re-read ->h_load_next during hierarchical load calculation xen: Prevent buffer overflow in privcmd ioctl arm64: backtrace: Don't bother trying to unwind the userspace stack arm64: dts: rockchip: fix rk3328 rgmii high tx error rate arm64: futex: Fix FUTEX_WAKE_OP atomic ops with non-zero result value ARM: dts: at91: Fix typo in ISC_D0 on PC9 ARM: dts: am335x-evm: Correct the regulators for the audio codec ARM: dts: am335x-evmsk: Correct the regulators for the audio codec virtio: Honour 'may_reduce_num' in vring_create_virtqueue genirq: Initialize request_mutex if CONFIG_SPARSE_IRQ=n genirq: Respect IRQCHIP_SKIP_SET_WAKE in irq_chip_set_wake_parent() block: fix the return errno for direct IO block: do not leak memory in bio_copy_user_iov() btrfs: prop: fix vanished compression property after failed set btrfs: prop: fix zstd compression parameter validation Btrfs: do not allow trimming when a fs is mounted with the nologreplay option ASoC: fsl_esai: fix channel swap issue when stream starts include/linux/bitrev.h: fix constant bitrev drm/udl: add a release method and delay modeset teardown alarmtimer: Return correct remaining time parisc: regs_return_value() should return gpr28 parisc: Detect QEMU earlier in boot process arm64: dts: rockchip: fix rk3328 sdmmc0 write errors hv_netvsc: Fix unwanted wakeup after tx_disable ip6_tunnel: Match to ARPHRD_TUNNEL6 for dev type ALSA: seq: Fix OOB-reads from strlcpy net: ethtool: not call vzalloc for zero sized memory request netns: provide pure entropy for net_hash_mix() net/sched: act_sample: fix divide by zero in the traffic path bnxt_en: Reset device on RX buffer errors. bnxt_en: Improve RX consumer index validity check. nfp: validate the return code from dev_queue_xmit() net/mlx5e: Add a lock on tir list net/mlx5e: Fix error handling when refreshing TIRs vrf: check accept_source_route on the original netdevice tcp: Ensure DCTCP reacts to losses sctp: initialize _pad of sockaddr_in before copying to user memory qmi_wwan: add Olicard 600 openvswitch: fix flow actions reallocation net/sched: fix ->get helper of the matchall cls net: rds: force to destroy connection if t_sock is NULL in rds_tcp_kill_sock(). net/mlx5: Decrease default mr cache size net-gro: Fix GRO flush when receiving a GSO packet. kcm: switch order of device registration to fix a crash ipv6: sit: reset ip header pointer in ipip6_rcv ipv6: Fix dangling pointer when ipv6 fragment tty: ldisc: add sysctl to prevent autoloading of ldiscs tty: mark Siemens R3964 line discipline as BROKEN arm64: kaslr: Reserve size of ARM64_MEMSTART_ALIGN in linear region stating: ccree: revert "staging: ccree: fix leak of import() after init()" lib/string.c: implement a basic bcmp x86/vdso: Drop implicit common-page-size linker flag x86: vdso: Use $LD instead of $CC to link kbuild: clang: choose GCC_TOOLCHAIN_DIR not on LD powerpc/tm: Limit TM code inside PPC_TRANSACTIONAL_MEM drm/i915/gvt: do not let pin count of shadow mm go negative x86/power: Make restore_processor_context() sane x86/power/32: Move SYSENTER MSR restoration to fix_processor_context() x86/power/64: Use struct desc_ptr for the IDT in struct saved_context x86/power: Fix some ordering bugs in __restore_processor_context() net: sfp: move sfp_register_socket call from sfp_remove to sfp_probe Revert "CHROMIUM: dm: boot time specification of dm=" Revert "ANDROID: dm: do_mounts_dm: Rebase on top of 4.9" Revert "ANDROID: dm: do_mounts_dm: fix dm_substitute_devices()" Revert "ANDROID: dm: do_mounts_dm: Update init/do_mounts_dm.c to the latest ChromiumOS version." sched/fair: remove printk while schedule is in progress ANDROID: Makefile: Add '-fsplit-lto-unit' to cfi-clang-flags ANDROID: cfi: Remove unused variable in ptr_to_check_fn ANDROID: cuttlefish_defconfig: Enable CONFIG_FUSE_FS Conflicts: arch/arm64/kernel/traps.c drivers/mmc/host/sdhci.c drivers/mmc/host/sdhci.h drivers/tty/Kconfig kernel/sched/fair.c Change-Id: Ic4c01204f58cdb536e2cab04e4f1a2451977f6a3 Signed-off-by: Blagovest Kolenichev <bkolenichev@codeaurora.org>
519 lines
13 KiB
C
519 lines
13 KiB
C
/*
|
|
* inet fragments management
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*
|
|
* Authors: Pavel Emelyanov <xemul@openvz.org>
|
|
* Started as consolidation of ipv4/ip_fragment.c,
|
|
* ipv6/reassembly. and ipv6 nf conntrack reassembly
|
|
*/
|
|
|
|
#include <linux/list.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/module.h>
|
|
#include <linux/timer.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/random.h>
|
|
#include <linux/skbuff.h>
|
|
#include <linux/rtnetlink.h>
|
|
#include <linux/slab.h>
|
|
|
|
#include <net/sock.h>
|
|
#include <net/inet_frag.h>
|
|
#include <net/inet_ecn.h>
|
|
#include <net/ip.h>
|
|
#include <net/ipv6.h>
|
|
|
|
/* Use skb->cb to track consecutive/adjacent fragments coming at
|
|
* the end of the queue. Nodes in the rb-tree queue will
|
|
* contain "runs" of one or more adjacent fragments.
|
|
*
|
|
* Invariants:
|
|
* - next_frag is NULL at the tail of a "run";
|
|
* - the head of a "run" has the sum of all fragment lengths in frag_run_len.
|
|
*/
|
|
struct ipfrag_skb_cb {
|
|
union {
|
|
struct inet_skb_parm h4;
|
|
struct inet6_skb_parm h6;
|
|
};
|
|
struct sk_buff *next_frag;
|
|
int frag_run_len;
|
|
};
|
|
|
|
#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
|
|
|
|
static void fragcb_clear(struct sk_buff *skb)
|
|
{
|
|
RB_CLEAR_NODE(&skb->rbnode);
|
|
FRAG_CB(skb)->next_frag = NULL;
|
|
FRAG_CB(skb)->frag_run_len = skb->len;
|
|
}
|
|
|
|
/* Append skb to the last "run". */
|
|
static void fragrun_append_to_last(struct inet_frag_queue *q,
|
|
struct sk_buff *skb)
|
|
{
|
|
fragcb_clear(skb);
|
|
|
|
FRAG_CB(q->last_run_head)->frag_run_len += skb->len;
|
|
FRAG_CB(q->fragments_tail)->next_frag = skb;
|
|
q->fragments_tail = skb;
|
|
}
|
|
|
|
/* Create a new "run" with the skb. */
|
|
static void fragrun_create(struct inet_frag_queue *q, struct sk_buff *skb)
|
|
{
|
|
BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb));
|
|
fragcb_clear(skb);
|
|
|
|
if (q->last_run_head)
|
|
rb_link_node(&skb->rbnode, &q->last_run_head->rbnode,
|
|
&q->last_run_head->rbnode.rb_right);
|
|
else
|
|
rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node);
|
|
rb_insert_color(&skb->rbnode, &q->rb_fragments);
|
|
|
|
q->fragments_tail = skb;
|
|
q->last_run_head = skb;
|
|
}
|
|
|
|
/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
|
|
* Value : 0xff if frame should be dropped.
|
|
* 0 or INET_ECN_CE value, to be ORed in to final iph->tos field
|
|
*/
|
|
const u8 ip_frag_ecn_table[16] = {
|
|
/* at least one fragment had CE, and others ECT_0 or ECT_1 */
|
|
[IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE,
|
|
[IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE,
|
|
[IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE,
|
|
|
|
/* invalid combinations : drop frame */
|
|
[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff,
|
|
[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff,
|
|
[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff,
|
|
[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
|
|
[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff,
|
|
[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff,
|
|
[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
|
|
};
|
|
EXPORT_SYMBOL(ip_frag_ecn_table);
|
|
|
|
int inet_frags_init(struct inet_frags *f)
|
|
{
|
|
f->frags_cachep = kmem_cache_create(f->frags_cache_name, f->qsize, 0, 0,
|
|
NULL);
|
|
if (!f->frags_cachep)
|
|
return -ENOMEM;
|
|
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL(inet_frags_init);
|
|
|
|
void inet_frags_fini(struct inet_frags *f)
|
|
{
|
|
/* We must wait that all inet_frag_destroy_rcu() have completed. */
|
|
rcu_barrier();
|
|
|
|
kmem_cache_destroy(f->frags_cachep);
|
|
f->frags_cachep = NULL;
|
|
}
|
|
EXPORT_SYMBOL(inet_frags_fini);
|
|
|
|
static void inet_frags_free_cb(void *ptr, void *arg)
|
|
{
|
|
struct inet_frag_queue *fq = ptr;
|
|
|
|
/* If we can not cancel the timer, it means this frag_queue
|
|
* is already disappearing, we have nothing to do.
|
|
* Otherwise, we own a refcount until the end of this function.
|
|
*/
|
|
if (!del_timer(&fq->timer))
|
|
return;
|
|
|
|
spin_lock_bh(&fq->lock);
|
|
if (!(fq->flags & INET_FRAG_COMPLETE)) {
|
|
fq->flags |= INET_FRAG_COMPLETE;
|
|
refcount_dec(&fq->refcnt);
|
|
}
|
|
spin_unlock_bh(&fq->lock);
|
|
|
|
inet_frag_put(fq);
|
|
}
|
|
|
|
void inet_frags_exit_net(struct netns_frags *nf)
|
|
{
|
|
nf->high_thresh = 0; /* prevent creation of new frags */
|
|
|
|
rhashtable_free_and_destroy(&nf->rhashtable, inet_frags_free_cb, NULL);
|
|
}
|
|
EXPORT_SYMBOL(inet_frags_exit_net);
|
|
|
|
void inet_frag_kill(struct inet_frag_queue *fq)
|
|
{
|
|
if (del_timer(&fq->timer))
|
|
refcount_dec(&fq->refcnt);
|
|
|
|
if (!(fq->flags & INET_FRAG_COMPLETE)) {
|
|
struct netns_frags *nf = fq->net;
|
|
|
|
fq->flags |= INET_FRAG_COMPLETE;
|
|
rhashtable_remove_fast(&nf->rhashtable, &fq->node, nf->f->rhash_params);
|
|
refcount_dec(&fq->refcnt);
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(inet_frag_kill);
|
|
|
|
static void inet_frag_destroy_rcu(struct rcu_head *head)
|
|
{
|
|
struct inet_frag_queue *q = container_of(head, struct inet_frag_queue,
|
|
rcu);
|
|
struct inet_frags *f = q->net->f;
|
|
|
|
if (f->destructor)
|
|
f->destructor(q);
|
|
kmem_cache_free(f->frags_cachep, q);
|
|
}
|
|
|
|
unsigned int inet_frag_rbtree_purge(struct rb_root *root)
|
|
{
|
|
struct rb_node *p = rb_first(root);
|
|
unsigned int sum = 0;
|
|
|
|
while (p) {
|
|
struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
|
|
|
|
p = rb_next(p);
|
|
rb_erase(&skb->rbnode, root);
|
|
while (skb) {
|
|
struct sk_buff *next = FRAG_CB(skb)->next_frag;
|
|
|
|
sum += skb->truesize;
|
|
kfree_skb(skb);
|
|
skb = next;
|
|
}
|
|
}
|
|
return sum;
|
|
}
|
|
EXPORT_SYMBOL(inet_frag_rbtree_purge);
|
|
|
|
void inet_frag_destroy(struct inet_frag_queue *q)
|
|
{
|
|
struct sk_buff *fp;
|
|
struct netns_frags *nf;
|
|
unsigned int sum, sum_truesize = 0;
|
|
struct inet_frags *f;
|
|
|
|
WARN_ON(!(q->flags & INET_FRAG_COMPLETE));
|
|
WARN_ON(del_timer(&q->timer) != 0);
|
|
|
|
/* Release all fragment data. */
|
|
fp = q->fragments;
|
|
nf = q->net;
|
|
f = nf->f;
|
|
if (fp) {
|
|
do {
|
|
struct sk_buff *xp = fp->next;
|
|
|
|
sum_truesize += fp->truesize;
|
|
kfree_skb(fp);
|
|
fp = xp;
|
|
} while (fp);
|
|
} else {
|
|
sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments);
|
|
}
|
|
sum = sum_truesize + f->qsize;
|
|
|
|
call_rcu(&q->rcu, inet_frag_destroy_rcu);
|
|
|
|
sub_frag_mem_limit(nf, sum);
|
|
}
|
|
EXPORT_SYMBOL(inet_frag_destroy);
|
|
|
|
static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
|
|
struct inet_frags *f,
|
|
void *arg)
|
|
{
|
|
struct inet_frag_queue *q;
|
|
|
|
if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh)
|
|
return NULL;
|
|
|
|
q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC);
|
|
if (!q)
|
|
return NULL;
|
|
|
|
q->net = nf;
|
|
f->constructor(q, arg);
|
|
add_frag_mem_limit(nf, f->qsize);
|
|
|
|
timer_setup(&q->timer, f->frag_expire, 0);
|
|
spin_lock_init(&q->lock);
|
|
refcount_set(&q->refcnt, 3);
|
|
|
|
return q;
|
|
}
|
|
|
|
static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
|
|
void *arg,
|
|
struct inet_frag_queue **prev)
|
|
{
|
|
struct inet_frags *f = nf->f;
|
|
struct inet_frag_queue *q;
|
|
|
|
q = inet_frag_alloc(nf, f, arg);
|
|
if (!q) {
|
|
*prev = ERR_PTR(-ENOMEM);
|
|
return NULL;
|
|
}
|
|
mod_timer(&q->timer, jiffies + nf->timeout);
|
|
|
|
*prev = rhashtable_lookup_get_insert_key(&nf->rhashtable, &q->key,
|
|
&q->node, f->rhash_params);
|
|
if (*prev) {
|
|
q->flags |= INET_FRAG_COMPLETE;
|
|
inet_frag_kill(q);
|
|
inet_frag_destroy(q);
|
|
return NULL;
|
|
}
|
|
return q;
|
|
}
|
|
|
|
/* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */
|
|
struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key)
|
|
{
|
|
struct inet_frag_queue *fq = NULL, *prev;
|
|
|
|
rcu_read_lock();
|
|
|
|
prev = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params);
|
|
if (!prev)
|
|
fq = inet_frag_create(nf, key, &prev);
|
|
if (prev && !IS_ERR(prev)) {
|
|
fq = prev;
|
|
if (!refcount_inc_not_zero(&fq->refcnt))
|
|
fq = NULL;
|
|
}
|
|
rcu_read_unlock();
|
|
return fq;
|
|
}
|
|
EXPORT_SYMBOL(inet_frag_find);
|
|
|
|
int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,
|
|
int offset, int end)
|
|
{
|
|
struct sk_buff *last = q->fragments_tail;
|
|
|
|
/* RFC5722, Section 4, amended by Errata ID : 3089
|
|
* When reassembling an IPv6 datagram, if
|
|
* one or more its constituent fragments is determined to be an
|
|
* overlapping fragment, the entire datagram (and any constituent
|
|
* fragments) MUST be silently discarded.
|
|
*
|
|
* Duplicates, however, should be ignored (i.e. skb dropped, but the
|
|
* queue/fragments kept for later reassembly).
|
|
*/
|
|
if (!last)
|
|
fragrun_create(q, skb); /* First fragment. */
|
|
else if (last->ip_defrag_offset + last->len < end) {
|
|
/* This is the common case: skb goes to the end. */
|
|
/* Detect and discard overlaps. */
|
|
if (offset < last->ip_defrag_offset + last->len)
|
|
return IPFRAG_OVERLAP;
|
|
if (offset == last->ip_defrag_offset + last->len)
|
|
fragrun_append_to_last(q, skb);
|
|
else
|
|
fragrun_create(q, skb);
|
|
} else {
|
|
/* Binary search. Note that skb can become the first fragment,
|
|
* but not the last (covered above).
|
|
*/
|
|
struct rb_node **rbn, *parent;
|
|
|
|
rbn = &q->rb_fragments.rb_node;
|
|
do {
|
|
struct sk_buff *curr;
|
|
int curr_run_end;
|
|
|
|
parent = *rbn;
|
|
curr = rb_to_skb(parent);
|
|
curr_run_end = curr->ip_defrag_offset +
|
|
FRAG_CB(curr)->frag_run_len;
|
|
if (end <= curr->ip_defrag_offset)
|
|
rbn = &parent->rb_left;
|
|
else if (offset >= curr_run_end)
|
|
rbn = &parent->rb_right;
|
|
else if (offset >= curr->ip_defrag_offset &&
|
|
end <= curr_run_end)
|
|
return IPFRAG_DUP;
|
|
else
|
|
return IPFRAG_OVERLAP;
|
|
} while (*rbn);
|
|
/* Here we have parent properly set, and rbn pointing to
|
|
* one of its NULL left/right children. Insert skb.
|
|
*/
|
|
fragcb_clear(skb);
|
|
rb_link_node(&skb->rbnode, parent, rbn);
|
|
rb_insert_color(&skb->rbnode, &q->rb_fragments);
|
|
}
|
|
|
|
skb->ip_defrag_offset = offset;
|
|
|
|
return IPFRAG_OK;
|
|
}
|
|
EXPORT_SYMBOL(inet_frag_queue_insert);
|
|
|
|
void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
|
|
struct sk_buff *parent)
|
|
{
|
|
struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments);
|
|
struct sk_buff **nextp;
|
|
int delta;
|
|
|
|
if (head != skb) {
|
|
fp = skb_clone(skb, GFP_ATOMIC);
|
|
if (!fp)
|
|
return NULL;
|
|
FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag;
|
|
if (RB_EMPTY_NODE(&skb->rbnode))
|
|
FRAG_CB(parent)->next_frag = fp;
|
|
else
|
|
rb_replace_node(&skb->rbnode, &fp->rbnode,
|
|
&q->rb_fragments);
|
|
if (q->fragments_tail == skb)
|
|
q->fragments_tail = fp;
|
|
skb_morph(skb, head);
|
|
FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag;
|
|
rb_replace_node(&head->rbnode, &skb->rbnode,
|
|
&q->rb_fragments);
|
|
consume_skb(head);
|
|
head = skb;
|
|
}
|
|
WARN_ON(head->ip_defrag_offset != 0);
|
|
|
|
delta = -head->truesize;
|
|
|
|
/* Head of list must not be cloned. */
|
|
if (skb_unclone(head, GFP_ATOMIC))
|
|
return NULL;
|
|
|
|
delta += head->truesize;
|
|
if (delta)
|
|
add_frag_mem_limit(q->net, delta);
|
|
|
|
/* If the first fragment is fragmented itself, we split
|
|
* it to two chunks: the first with data and paged part
|
|
* and the second, holding only fragments.
|
|
*/
|
|
if (skb_has_frag_list(head)) {
|
|
struct sk_buff *clone;
|
|
int i, plen = 0;
|
|
|
|
clone = alloc_skb(0, GFP_ATOMIC);
|
|
if (!clone)
|
|
return NULL;
|
|
skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
|
|
skb_frag_list_init(head);
|
|
for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
|
|
plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
|
|
clone->data_len = head->data_len - plen;
|
|
clone->len = clone->data_len;
|
|
head->truesize += clone->truesize;
|
|
clone->csum = 0;
|
|
clone->ip_summed = head->ip_summed;
|
|
add_frag_mem_limit(q->net, clone->truesize);
|
|
skb_shinfo(head)->frag_list = clone;
|
|
nextp = &clone->next;
|
|
} else {
|
|
nextp = &skb_shinfo(head)->frag_list;
|
|
}
|
|
|
|
return nextp;
|
|
}
|
|
EXPORT_SYMBOL(inet_frag_reasm_prepare);
|
|
|
|
void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
|
|
void *reasm_data)
|
|
{
|
|
struct sk_buff **nextp = (struct sk_buff **)reasm_data;
|
|
struct rb_node *rbn;
|
|
struct sk_buff *fp;
|
|
|
|
skb_push(head, head->data - skb_network_header(head));
|
|
|
|
/* Traverse the tree in order, to build frag_list. */
|
|
fp = FRAG_CB(head)->next_frag;
|
|
rbn = rb_next(&head->rbnode);
|
|
rb_erase(&head->rbnode, &q->rb_fragments);
|
|
while (rbn || fp) {
|
|
/* fp points to the next sk_buff in the current run;
|
|
* rbn points to the next run.
|
|
*/
|
|
/* Go through the current run. */
|
|
while (fp) {
|
|
*nextp = fp;
|
|
nextp = &fp->next;
|
|
fp->prev = NULL;
|
|
memset(&fp->rbnode, 0, sizeof(fp->rbnode));
|
|
fp->sk = NULL;
|
|
head->data_len += fp->len;
|
|
head->len += fp->len;
|
|
if (head->ip_summed != fp->ip_summed)
|
|
head->ip_summed = CHECKSUM_NONE;
|
|
else if (head->ip_summed == CHECKSUM_COMPLETE)
|
|
head->csum = csum_add(head->csum, fp->csum);
|
|
head->truesize += fp->truesize;
|
|
fp = FRAG_CB(fp)->next_frag;
|
|
}
|
|
/* Move to the next run. */
|
|
if (rbn) {
|
|
struct rb_node *rbnext = rb_next(rbn);
|
|
|
|
fp = rb_to_skb(rbn);
|
|
rb_erase(rbn, &q->rb_fragments);
|
|
rbn = rbnext;
|
|
}
|
|
}
|
|
sub_frag_mem_limit(q->net, head->truesize);
|
|
|
|
*nextp = NULL;
|
|
head->next = NULL;
|
|
head->prev = NULL;
|
|
head->tstamp = q->stamp;
|
|
}
|
|
EXPORT_SYMBOL(inet_frag_reasm_finish);
|
|
|
|
struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q)
|
|
{
|
|
struct sk_buff *head;
|
|
|
|
if (q->fragments) {
|
|
head = q->fragments;
|
|
q->fragments = head->next;
|
|
} else {
|
|
struct sk_buff *skb;
|
|
|
|
head = skb_rb_first(&q->rb_fragments);
|
|
if (!head)
|
|
return NULL;
|
|
skb = FRAG_CB(head)->next_frag;
|
|
if (skb)
|
|
rb_replace_node(&head->rbnode, &skb->rbnode,
|
|
&q->rb_fragments);
|
|
else
|
|
rb_erase(&head->rbnode, &q->rb_fragments);
|
|
memset(&head->rbnode, 0, sizeof(head->rbnode));
|
|
barrier();
|
|
}
|
|
if (head == q->fragments_tail)
|
|
q->fragments_tail = NULL;
|
|
|
|
sub_frag_mem_limit(q->net, head->truesize);
|
|
|
|
return head;
|
|
}
|
|
EXPORT_SYMBOL(inet_frag_pull_head);
|