Reduce per-packet computation overhead for LZS compression

Since there is no history preserved from one packet to the next, we don't actually keep a real history. We just use the packet data instead. However, we can *pretend* we do, and pretend that we keep a full 32 bits (4GiB) of it. The current packet data represents just the latest part of it, and we should never be looking at anything older anyway. By using 32 bits for the hash offsets, and starting each new packet sufficiently far from the previous packet that it cannot 'see' history that it shouldn't, we can avoid having to clear the entire hash table data structure for each packet. The data structures are now twice as big, but only cleared once in every 65536 packets. Be a lot more paranoid about the contents of the hash tables now that they are more long-lived, to prevent problems caused by corruption. Now the worst that should happen is you waste some time looking for matches where there are none, even if you inherit complete crap in the data structures. Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
sailfishos-mirror · Jan 7, 2015 · 6915682 · 6915682
1 parent a99260d
commit 6915682
Show file tree

Hide file tree

Showing 6 changed files with 85 additions and 34 deletions.
diff --git a/cstp.c b/cstp.c
@@ -575,9 +575,18 @@ int openconnect_make_cstp_connection(struct openconnect_info *vpninfo)
 	if (ret)
 		goto out;
 
-	/* This will definitely be smaller than zlib's */
-	if (vpninfo->cstp_compr == COMPR_LZS)
+	if (vpninfo->cstp_compr == COMPR_LZS) {
+		if (!vpninfo->lzs_state)
+			vpninfo->lzs_state = alloc_lzs_state();
+		if (!vpninfo->lzs_state) {
+			vpn_progress(vpninfo, PRG_ERR, _("Compression setup failed\n"));
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		/* This will definitely be smaller than zlib's */
 		deflate_bufsize = vpninfo->ip_info.mtu;
+	}
 
 	/* If deflate compression is enabled (which is CSTP-only), it needs its
 	 * context to be allocated. */
@@ -1102,7 +1111,8 @@ int cstp_mainloop(struct openconnect_info *vpninfo, int *timeout)
 			vpninfo->pending_deflated_pkt = this;
 			vpninfo->current_ssl_pkt = vpninfo->deflate_pkt;
 		} else if (vpninfo->cstp_compr == COMPR_LZS) {
-			ret = lzs_compress(vpninfo->deflate_pkt->data, this->len,
+			ret = lzs_compress(vpninfo->lzs_state,
+					   vpninfo->deflate_pkt->data, this->len,
 					   this->data, this->len);
 			if (ret < 0)
 				goto uncompr; /* It only ever returns -EFBIG */

diff --git a/dtls.c b/dtls.c
@@ -860,7 +860,8 @@ int dtls_mainloop(struct openconnect_info *vpninfo, int *timeout)
 		if (vpninfo->dtls_compr & COMPR_LZS && this->len > 0x40 &&
 		    vpninfo->current_ssl_pkt != vpninfo->deflate_pkt) {
 
-			ret = lzs_compress(vpninfo->deflate_pkt->data, this->len,
+			ret = lzs_compress(vpninfo->lzs_state,
+					   vpninfo->deflate_pkt->data, this->len,
 					   this->data, this->len);
 			if (ret > 0) {
 				send_pkt = vpninfo->deflate_pkt;

diff --git a/library.c b/library.c
@@ -291,7 +291,7 @@ void openconnect_vpninfo_free(struct openconnect_info *vpninfo)
 	/* These check strm->state so they are safe to call multiple times */
 	inflateEnd(&vpninfo->inflate_strm);
 	deflateEnd(&vpninfo->deflate_strm);
-
+	free(vpninfo->lzs_state);
 	free(vpninfo->deflate_pkt);
 	free(vpninfo->tun_pkt);
 	free(vpninfo->dtls_pkt);

diff --git a/lzs.c b/lzs.c
@@ -158,16 +158,7 @@ do {								\
  * Much of the compression algorithm used here is based very loosely on ideas
  * from isdn_lzscomp.c by Andre Beck: http://micky.ibh.de/~beck/stuff/lzs4i4l/
  */
-int lzs_compress(unsigned char *dst, int dstlen, const unsigned char *src, int srclen)
-{
-	int inpos = 0;
-	uint32_t match_len;
-	uint32_t hash;
-	uint16_t hofs, longest_match_len, longest_match_ofs;
-	int outpos = 0;
-	uint32_t outbits = 0;
-	int nr_outbits = 0;
-
+struct lzs_state {
 	/*
 	 * Each pair of bytes from the input is hashed into a hash value of
 	 * size HASH_BITS (currently 12 bits). We could use 16 bits and stop
@@ -185,8 +176,8 @@ int lzs_compress(unsigned char *dst, int dstlen, const unsigned char *src, int s
 	 * since we know IP packets are limited to 64KiB and we can never be
 	 * *starting* a match at the penultimate byte of the packet.
 	 */
-#define INVALID_OFS 0xffff
-	uint16_t hash_table[HASH_TABLE_SIZE]; /* Buffer offset for first match */
+#define INVALID_OFS 0xffffffff
+	uint32_t hash_table[HASH_TABLE_SIZE]; /* Buffer offset for first match */
 
 	/*
 	 * The second data structure allows us to find the previous occurrences
@@ -195,32 +186,72 @@ int lzs_compress(unsigned char *dst, int dstlen, const unsigned char *src, int s
 	 * offset will yield the previous offset at which the same data hash
 	 * value was found.
 	 */
-#define MAX_HISTORY (1<<11) /* Highest offset LZS can represent is 11 bits */
-	uint16_t hash_chain[MAX_HISTORY];
+#define MAX_HISTORY (1ULL<<11) /* Highest offset LZS can represent is 11 bits */
+	uint32_t hash_chain[MAX_HISTORY];
+	uint32_t virt_ofs;
+
+};
+
+struct lzs_state *alloc_lzs_state(void)
+{
+	struct lzs_state *lzs = malloc(sizeof(*lzs));
+	if (!lzs)
+		return NULL;
+
+	lzs->virt_ofs = 0;
+
+	return lzs;
+}
+
+int lzs_compress(struct lzs_state *lzs, unsigned char *dst, int dstlen,
+		 const unsigned char *src, int srclen)
+{
+	int inpos = 0;
+	uint32_t match_len;
+	uint32_t hash;
+	uint32_t hofs, longest_match_len, longest_match_ofs;
+	int outpos = 0;
+	uint32_t outbits = 0;
+	int nr_outbits = 0;
+	uint32_t pkt_ofs;
 
 	/* Just in case anyone tries to use this in a more general-purpose
 	 * scenario... */
-	if (srclen > INVALID_OFS + 1)
+	if (srclen > 0x10000)
 		return -EFBIG;
 
-	/* There are ways we could probably avoid having to do this memset
-	 * each time... */
-	memset(hash_table, 0xff, sizeof(hash_table));
-	memset(hash_chain, 0xff, sizeof(hash_chain));
+	if (!lzs->virt_ofs) {
+		memset(lzs->hash_table, 0xff, sizeof(lzs->hash_table));
+		memset(lzs->hash_chain, 0xff, sizeof(lzs->hash_chain));
+	}
+	pkt_ofs = lzs->virt_ofs;
+
+	/* Ensure the next packet cannot see any of our history. */
+	lzs->virt_ofs += (srclen + MAX_HISTORY + MAX_HISTORY - 1) & ~(MAX_HISTORY - 1);
 
 	while (inpos < srclen - 1) {
 		hash = HASH(src + inpos);
-		hofs = hash_table[hash];
+		hofs = lzs->hash_table[hash];
 
 		longest_match_len = 0;
 
-		while (hofs != INVALID_OFS && hofs + MAX_HISTORY > inpos) {
-			match_len = find_match_len(src, hofs, inpos, longest_match_len ? : 2, srclen - inpos);
+		/* For a given 32-bit virtual offset to be reasonable, it must
+		   actually fall within the range of the packet that we've seen so
+		   far (i.e. pkt_ofs to pkt_ofs + inpos - 1). It must also not be
+		   further behind pkt_ofs + inpos than MAX_HISTORY */
+		while (hofs != INVALID_OFS && hofs < pkt_ofs + inpos && hofs >= pkt_ofs &&
+		       hofs + MAX_HISTORY > pkt_ofs + inpos) {
+			match_len = find_match_len(src, hofs - pkt_ofs, inpos,
+						   longest_match_len ? : 2, srclen - inpos);
 			if (match_len > longest_match_len) {
 				longest_match_len = match_len;
-				longest_match_ofs = hofs;
+				longest_match_ofs = hofs - pkt_ofs;
 			}
-			hofs = hash_chain[hofs & (MAX_HISTORY - 1)];
+			/* Sanity check to prevent looping — we should always be
+			 * working *backwards* */
+			if (lzs->hash_chain[hofs & (MAX_HISTORY - 1)] >= hofs)
+				break;
+			hofs = lzs->hash_chain[hofs & (MAX_HISTORY - 1)];
 		}
 		if (longest_match_len) {
 			/* Output offset, as 7-bit or 11-bit as appropriate */
@@ -260,8 +291,8 @@ int lzs_compress(unsigned char *dst, int dstlen, const unsigned char *src, int s
 
 		while (longest_match_len--) {
 			hash = HASH(src + inpos);
-			hash_chain[inpos & (MAX_HISTORY - 1)] = hash_table[hash];
-			hash_table[hash] = inpos++;
+			lzs->hash_chain[inpos & (MAX_HISTORY - 1)] = lzs->hash_table[hash];
+			lzs->hash_table[hash] = pkt_ofs + inpos++;
 		}
 	}
 	if (inpos < srclen)

diff --git a/lzstest.c b/lzstest.c
@@ -18,8 +18,10 @@
 
 #define __OPENCONNECT_INTERNAL_H__
 
+struct lzs_state;
+struct lzs_state *alloc_lzs_state(void);
 int lzs_decompress(unsigned char *dst, int dstlen, const unsigned char *src, int srclen);
-int lzs_compress(unsigned char *dst, int dstlen, const unsigned char *src, int srclen);
+int lzs_compress(struct lzs_state *lzs, unsigned char *dst, int dstlen, const unsigned char *src, int srclen);
 
 #include "lzs.c"
 
@@ -38,6 +40,7 @@ int main(void)
 	unsigned char pktbuf[MAX_PKT + 3];
 	unsigned char comprbuf[MAX_PKT * 9 / 8 + 2];
 	unsigned char uncomprbuf[MAX_PKT];
+	struct lzs_state *lzs = alloc_lzs_state();
 
 	srand(0xdeadbeef);
 
@@ -61,7 +64,7 @@ int main(void)
 			*(int *)(pktbuf + j) = r;
 		}		
 
-		ret = lzs_compress(comprbuf, sizeof(comprbuf), pktbuf, pktlen);
+		ret = lzs_compress(lzs, comprbuf, sizeof(comprbuf), pktbuf, pktlen);
 		if (ret < 0) {
 			fprintf(stderr, "Compressing packet %d failed: %s\n", i, strerror(-ret));
 			exit(1);

diff --git a/openconnect-internal.h b/openconnect-internal.h
@@ -194,6 +194,8 @@ struct proxy_auth_state {
 	char *challenge;
 };
 
+struct lzs_state;
+
 struct openconnect_info {
 #ifdef HAVE_ICONV
 	iconv_t ic_legacy_to_utf8;
@@ -369,6 +371,8 @@ struct openconnect_info {
 	z_stream deflate_strm;
 	uint32_t deflate_adler32;
 
+	struct lzs_state *lzs_state;
+
 	int disable_ipv6;
 	int reconnect_timeout;
 	int reconnect_interval;
@@ -636,7 +640,9 @@ int decompress_and_queue_packet(struct openconnect_info *vpninfo,
 
 /* lzs.c */
 int lzs_decompress(unsigned char *dst, int dstlen, const unsigned char *src, int srclen);
-int lzs_compress(unsigned char *dst, int dstlen, const unsigned char *src, int srclen);
+int lzs_compress(struct lzs_state *, unsigned char *dst, int dstlen,
+		 const unsigned char *src, int srclen);
+struct lzs_state *alloc_lzs_state(void);
 
 /* ssl.c */
 unsigned string_is_hostname(const char* str);