Commit 44476f1a authored by David Woodhouse's avatar David Woodhouse

Revert LZS optimisation experiments

They weren't helping. With the 65536-byte packet tests (1500 would perhaps
be more representative but didn't make them look any better):

a99260d6 Add LZS test harness
Samples: 6K of event 'cycles', Event count (approx.): 5627230524
Overhead       Samples  Command  Shared Object      Symbol
  69.71%          4498  lzstest  lzstest            [.] lzs_compress
  12.91%           834  lzstest  lzstest            [.] lzs_decompress

69156824 Reduce per-packet computation overhead for LZS compression
Samples: 7K of event 'cycles', Event count (approx.): 6190866454
Overhead       Samples  Command  Shared Object      Symbol
  71.37%          5050  lzstest  lzstest            [.] lzs_compress
  11.67%           826  lzstest  lzstest            [.] lzs_decompress

64d6b19f Simplify LZS compression again
Samples: 6K of event 'cycles', Event count (approx.): 6074401505
Overhead       Samples  Command  Shared Object      Symbol
  71.27%          4923  lzstest  lzstest            [.] lzs_compress
  11.54%           797  lzstest  lzstest            [.] lzs_decompress

The memset() just isn't as expensive as the other tricks we play to
avoid it. So let's just stick with the simple version for now, unless
someone comes up with some better ideas.
Signed-off-by: default avatarDavid Woodhouse <David.Woodhouse@intel.com>
parent 64d6b19f
......@@ -575,18 +575,9 @@ int openconnect_make_cstp_connection(struct openconnect_info *vpninfo)
if (ret)
goto out;
if (vpninfo->cstp_compr == COMPR_LZS) {
if (!vpninfo->lzs_state)
vpninfo->lzs_state = alloc_lzs_state();
if (!vpninfo->lzs_state) {
vpn_progress(vpninfo, PRG_ERR, _("Compression setup failed\n"));
ret = -ENOMEM;
goto out;
}
/* This will definitely be smaller than zlib's */
/* This will definitely be smaller than zlib's */
if (vpninfo->cstp_compr == COMPR_LZS)
deflate_bufsize = vpninfo->ip_info.mtu;
}
/* If deflate compression is enabled (which is CSTP-only), it needs its
* context to be allocated. */
......@@ -1111,8 +1102,7 @@ int cstp_mainloop(struct openconnect_info *vpninfo, int *timeout)
vpninfo->pending_deflated_pkt = this;
vpninfo->current_ssl_pkt = vpninfo->deflate_pkt;
} else if (vpninfo->cstp_compr == COMPR_LZS) {
ret = lzs_compress(vpninfo->lzs_state,
vpninfo->deflate_pkt->data, this->len,
ret = lzs_compress(vpninfo->deflate_pkt->data, this->len,
this->data, this->len);
if (ret < 0)
goto uncompr; /* It only ever returns -EFBIG */
......
......@@ -860,8 +860,7 @@ int dtls_mainloop(struct openconnect_info *vpninfo, int *timeout)
if (vpninfo->dtls_compr & COMPR_LZS && this->len > 0x40 &&
vpninfo->current_ssl_pkt != vpninfo->deflate_pkt) {
ret = lzs_compress(vpninfo->lzs_state,
vpninfo->deflate_pkt->data, this->len,
ret = lzs_compress(vpninfo->deflate_pkt->data, this->len,
this->data, this->len);
if (ret > 0) {
send_pkt = vpninfo->deflate_pkt;
......
......@@ -291,7 +291,7 @@ void openconnect_vpninfo_free(struct openconnect_info *vpninfo)
/* These check strm->state so they are safe to call multiple times */
inflateEnd(&vpninfo->inflate_strm);
deflateEnd(&vpninfo->deflate_strm);
free(vpninfo->lzs_state);
free(vpninfo->deflate_pkt);
free(vpninfo->tun_pkt);
free(vpninfo->dtls_pkt);
......
......@@ -158,7 +158,16 @@ do { \
* Much of the compression algorithm used here is based very loosely on ideas
* from isdn_lzscomp.c by Andre Beck: http://micky.ibh.de/~beck/stuff/lzs4i4l/
*/
struct lzs_state {
int lzs_compress(unsigned char *dst, int dstlen, const unsigned char *src, int srclen)
{
int inpos = 0;
uint32_t match_len;
uint32_t hash;
uint16_t hofs, longest_match_len, longest_match_ofs;
int outpos = 0;
uint32_t outbits = 0;
int nr_outbits = 0;
/*
* Each pair of bytes from the input is hashed into a hash value of
* size HASH_BITS (currently 12 bits). We could use 16 bits and stop
......@@ -186,71 +195,32 @@ struct lzs_state {
* offset will yield the previous offset at which the same data hash
* value was found.
*/
#define MAX_HISTORY (1ULL<<11) /* Highest offset LZS can represent is 11 bits */
#define MAX_HISTORY (1<<11) /* Highest offset LZS can represent is 11 bits */
uint16_t hash_chain[MAX_HISTORY];
};
struct lzs_state *alloc_lzs_state(void)
{
struct lzs_state *lzs = malloc(sizeof(*lzs));
if (!lzs)
return NULL;
memset(lzs->hash_table, 0xff, sizeof(lzs->hash_table));
memset(lzs->hash_chain, 0xff, sizeof(lzs->hash_chain));
return lzs;
}
int lzs_compress(struct lzs_state *lzs, unsigned char *dst, int dstlen,
const unsigned char *src, int srclen)
{
int inpos = 0;
uint32_t match_len;
uint32_t hash;
uint32_t hofs, longest_match_len, longest_match_ofs;
int outpos = 0;
uint32_t outbits = 0;
int nr_outbits = 0;
/* Just in case anyone tries to use this in a more general-purpose
* scenario... */
if (srclen > 0x10000)
if (srclen > INVALID_OFS + 1)
return -EFBIG;
/* There are ways we could probably avoid having to do this memset
* each time... */
memset(hash_table, 0xff, sizeof(hash_table));
memset(hash_chain, 0xff, sizeof(hash_chain));
while (inpos < srclen - 1) {
hash = HASH(src + inpos);
hofs = lzs->hash_table[hash];
hofs = hash_table[hash];
longest_match_len = 0;
/* If there is a stale entry in the hash table from a previous
* packet, then clear it. This is what makes it OK to re-use the
* data structures without clearing them each time; the first time
* we see any given hash value in the current packet, we'll reset
* the chain.
*
* We don't have to worry about the contents of hash_chain because
* we should only ever be following links to entries in that which
* *have* been written this time around. */
if (hofs >= inpos || hash != HASH(src + hofs))
hofs = lzs->hash_table[hash] = INVALID_OFS;
/* inpos can never exceed INVALID_OFS-1 so there's no need for an
* explicit check for hofs != INVALID_OFS. */
while (hofs < inpos && hofs + MAX_HISTORY > inpos) {
match_len = find_match_len(src, hofs, inpos,
longest_match_len ? : 2, srclen - inpos);
while (hofs != INVALID_OFS && hofs + MAX_HISTORY > inpos) {
match_len = find_match_len(src, hofs, inpos, longest_match_len ? : 2, srclen - inpos);
if (match_len > longest_match_len) {
longest_match_len = match_len;
longest_match_ofs = hofs;
}
/* Sanity check to prevent looping — we should always be
* working *backwards* */
if (lzs->hash_chain[hofs & (MAX_HISTORY - 1)] >= hofs)
break;
hofs = lzs->hash_chain[hofs & (MAX_HISTORY - 1)];
hofs = hash_chain[hofs & (MAX_HISTORY - 1)];
}
if (longest_match_len) {
/* Output offset, as 7-bit or 11-bit as appropriate */
......@@ -290,12 +260,8 @@ int lzs_compress(struct lzs_state *lzs, unsigned char *dst, int dstlen,
while (longest_match_len--) {
hash = HASH(src + inpos);
/* Don't link to stale hash chains. */
if (hofs >= inpos || hash != HASH(src + hofs))
lzs->hash_chain[inpos & (MAX_HISTORY - 1)] = INVALID_OFS;
else
lzs->hash_chain[inpos & (MAX_HISTORY - 1)] = lzs->hash_table[hash];
lzs->hash_table[hash] = inpos++;
hash_chain[inpos & (MAX_HISTORY - 1)] = hash_table[hash];
hash_table[hash] = inpos++;
}
}
if (inpos < srclen)
......
......@@ -18,10 +18,8 @@
#define __OPENCONNECT_INTERNAL_H__
struct lzs_state;
struct lzs_state *alloc_lzs_state(void);
int lzs_decompress(unsigned char *dst, int dstlen, const unsigned char *src, int srclen);
int lzs_compress(struct lzs_state *lzs, unsigned char *dst, int dstlen, const unsigned char *src, int srclen);
int lzs_compress(unsigned char *dst, int dstlen, const unsigned char *src, int srclen);
#include "lzs.c"
......@@ -40,7 +38,6 @@ int main(void)
unsigned char pktbuf[MAX_PKT + 3];
unsigned char comprbuf[MAX_PKT * 9 / 8 + 2];
unsigned char uncomprbuf[MAX_PKT];
struct lzs_state *lzs = alloc_lzs_state();
srand(0xdeadbeef);
......@@ -64,7 +61,7 @@ int main(void)
*(int *)(pktbuf + j) = r;
}
ret = lzs_compress(lzs, comprbuf, sizeof(comprbuf), pktbuf, pktlen);
ret = lzs_compress(comprbuf, sizeof(comprbuf), pktbuf, pktlen);
if (ret < 0) {
fprintf(stderr, "Compressing packet %d failed: %s\n", i, strerror(-ret));
exit(1);
......
......@@ -194,8 +194,6 @@ struct proxy_auth_state {
char *challenge;
};
struct lzs_state;
struct openconnect_info {
#ifdef HAVE_ICONV
iconv_t ic_legacy_to_utf8;
......@@ -371,8 +369,6 @@ struct openconnect_info {
z_stream deflate_strm;
uint32_t deflate_adler32;
struct lzs_state *lzs_state;
int disable_ipv6;
int reconnect_timeout;
int reconnect_interval;
......@@ -640,9 +636,7 @@ int decompress_and_queue_packet(struct openconnect_info *vpninfo,
/* lzs.c */
int lzs_decompress(unsigned char *dst, int dstlen, const unsigned char *src, int srclen);
int lzs_compress(struct lzs_state *, unsigned char *dst, int dstlen,
const unsigned char *src, int srclen);
struct lzs_state *alloc_lzs_state(void);
int lzs_compress(unsigned char *dst, int dstlen, const unsigned char *src, int srclen);
/* ssl.c */
unsigned string_is_hostname(const char* str);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment