Skip to content

Commit

Permalink
Revert LZS optimisation experiments
Browse files Browse the repository at this point in the history
They weren't helping. With the 65536-byte packet tests (1500 would perhaps
be more representative but didn't make them look any better):

a99260d Add LZS test harness
Samples: 6K of event 'cycles', Event count (approx.): 5627230524
Overhead       Samples  Command  Shared Object      Symbol
  69.71%          4498  lzstest  lzstest            [.] lzs_compress
  12.91%           834  lzstest  lzstest            [.] lzs_decompress

6915682 Reduce per-packet computation overhead for LZS compression
Samples: 7K of event 'cycles', Event count (approx.): 6190866454
Overhead       Samples  Command  Shared Object      Symbol
  71.37%          5050  lzstest  lzstest            [.] lzs_compress
  11.67%           826  lzstest  lzstest            [.] lzs_decompress

64d6b19 Simplify LZS compression again
Samples: 6K of event 'cycles', Event count (approx.): 6074401505
Overhead       Samples  Command  Shared Object      Symbol
  71.27%          4923  lzstest  lzstest            [.] lzs_compress
  11.54%           797  lzstest  lzstest            [.] lzs_decompress

The memset() just isn't as expensive as the other tricks we play to
avoid it. So let's just stick with the simple version for now, unless
someone comes up with some better ideas.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
  • Loading branch information
David Woodhouse authored and David Woodhouse committed Jan 7, 2015
1 parent 64d6b19 commit 44476f1
Show file tree
Hide file tree
Showing 6 changed files with 31 additions and 85 deletions.
16 changes: 3 additions & 13 deletions cstp.c
Expand Up @@ -575,18 +575,9 @@ int openconnect_make_cstp_connection(struct openconnect_info *vpninfo)
if (ret)
goto out;

if (vpninfo->cstp_compr == COMPR_LZS) {
if (!vpninfo->lzs_state)
vpninfo->lzs_state = alloc_lzs_state();
if (!vpninfo->lzs_state) {
vpn_progress(vpninfo, PRG_ERR, _("Compression setup failed\n"));
ret = -ENOMEM;
goto out;
}

/* This will definitely be smaller than zlib's */
/* This will definitely be smaller than zlib's */
if (vpninfo->cstp_compr == COMPR_LZS)
deflate_bufsize = vpninfo->ip_info.mtu;
}

/* If deflate compression is enabled (which is CSTP-only), it needs its
* context to be allocated. */
Expand Down Expand Up @@ -1111,8 +1102,7 @@ int cstp_mainloop(struct openconnect_info *vpninfo, int *timeout)
vpninfo->pending_deflated_pkt = this;
vpninfo->current_ssl_pkt = vpninfo->deflate_pkt;
} else if (vpninfo->cstp_compr == COMPR_LZS) {
ret = lzs_compress(vpninfo->lzs_state,
vpninfo->deflate_pkt->data, this->len,
ret = lzs_compress(vpninfo->deflate_pkt->data, this->len,
this->data, this->len);
if (ret < 0)
goto uncompr; /* It only ever returns -EFBIG */
Expand Down
3 changes: 1 addition & 2 deletions dtls.c
Expand Up @@ -860,8 +860,7 @@ int dtls_mainloop(struct openconnect_info *vpninfo, int *timeout)
if (vpninfo->dtls_compr & COMPR_LZS && this->len > 0x40 &&
vpninfo->current_ssl_pkt != vpninfo->deflate_pkt) {

ret = lzs_compress(vpninfo->lzs_state,
vpninfo->deflate_pkt->data, this->len,
ret = lzs_compress(vpninfo->deflate_pkt->data, this->len,
this->data, this->len);
if (ret > 0) {
send_pkt = vpninfo->deflate_pkt;
Expand Down
2 changes: 1 addition & 1 deletion library.c
Expand Up @@ -291,7 +291,7 @@ void openconnect_vpninfo_free(struct openconnect_info *vpninfo)
/* These check strm->state so they are safe to call multiple times */
inflateEnd(&vpninfo->inflate_strm);
deflateEnd(&vpninfo->deflate_strm);
free(vpninfo->lzs_state);

free(vpninfo->deflate_pkt);
free(vpninfo->tun_pkt);
free(vpninfo->dtls_pkt);
Expand Down
80 changes: 23 additions & 57 deletions lzs.c
Expand Up @@ -158,7 +158,16 @@ do { \
* Much of the compression algorithm used here is based very loosely on ideas
* from isdn_lzscomp.c by Andre Beck: http://micky.ibh.de/~beck/stuff/lzs4i4l/
*/
struct lzs_state {
int lzs_compress(unsigned char *dst, int dstlen, const unsigned char *src, int srclen)
{
int inpos = 0;
uint32_t match_len;
uint32_t hash;
uint16_t hofs, longest_match_len, longest_match_ofs;
int outpos = 0;
uint32_t outbits = 0;
int nr_outbits = 0;

/*
* Each pair of bytes from the input is hashed into a hash value of
* size HASH_BITS (currently 12 bits). We could use 16 bits and stop
Expand Down Expand Up @@ -186,71 +195,32 @@ struct lzs_state {
* offset will yield the previous offset at which the same data hash
* value was found.
*/
#define MAX_HISTORY (1ULL<<11) /* Highest offset LZS can represent is 11 bits */
#define MAX_HISTORY (1<<11) /* Highest offset LZS can represent is 11 bits */
uint16_t hash_chain[MAX_HISTORY];

};

struct lzs_state *alloc_lzs_state(void)
{
struct lzs_state *lzs = malloc(sizeof(*lzs));
if (!lzs)
return NULL;

memset(lzs->hash_table, 0xff, sizeof(lzs->hash_table));
memset(lzs->hash_chain, 0xff, sizeof(lzs->hash_chain));

return lzs;
}

int lzs_compress(struct lzs_state *lzs, unsigned char *dst, int dstlen,
const unsigned char *src, int srclen)
{
int inpos = 0;
uint32_t match_len;
uint32_t hash;
uint32_t hofs, longest_match_len, longest_match_ofs;
int outpos = 0;
uint32_t outbits = 0;
int nr_outbits = 0;

/* Just in case anyone tries to use this in a more general-purpose
* scenario... */
if (srclen > 0x10000)
if (srclen > INVALID_OFS + 1)
return -EFBIG;

/* There are ways we could probably avoid having to do this memset
* each time... */
memset(hash_table, 0xff, sizeof(hash_table));
memset(hash_chain, 0xff, sizeof(hash_chain));

while (inpos < srclen - 1) {
hash = HASH(src + inpos);
hofs = lzs->hash_table[hash];
hofs = hash_table[hash];

longest_match_len = 0;

/* If there is a stale entry in the hash table from a previous
* packet, then clear it. This is what makes it OK to re-use the
* data structures without clearing them each time; the first time
* we see any given hash value in the current packet, we'll reset
* the chain.
*
* We don't have to worry about the contents of hash_chain because
* we should only ever be following links to entries in that which
* *have* been written this time around. */
if (hofs >= inpos || hash != HASH(src + hofs))
hofs = lzs->hash_table[hash] = INVALID_OFS;

/* inpos can never exceed INVALID_OFS-1 so there's no need for an
* explicit check for hofs != INVALID_OFS. */
while (hofs < inpos && hofs + MAX_HISTORY > inpos) {
match_len = find_match_len(src, hofs, inpos,
longest_match_len ? : 2, srclen - inpos);
while (hofs != INVALID_OFS && hofs + MAX_HISTORY > inpos) {
match_len = find_match_len(src, hofs, inpos, longest_match_len ? : 2, srclen - inpos);
if (match_len > longest_match_len) {
longest_match_len = match_len;
longest_match_ofs = hofs;
}
/* Sanity check to prevent looping — we should always be
* working *backwards* */
if (lzs->hash_chain[hofs & (MAX_HISTORY - 1)] >= hofs)
break;
hofs = lzs->hash_chain[hofs & (MAX_HISTORY - 1)];
hofs = hash_chain[hofs & (MAX_HISTORY - 1)];
}
if (longest_match_len) {
/* Output offset, as 7-bit or 11-bit as appropriate */
Expand Down Expand Up @@ -290,12 +260,8 @@ int lzs_compress(struct lzs_state *lzs, unsigned char *dst, int dstlen,

while (longest_match_len--) {
hash = HASH(src + inpos);
/* Don't link to stale hash chains. */
if (hofs >= inpos || hash != HASH(src + hofs))
lzs->hash_chain[inpos & (MAX_HISTORY - 1)] = INVALID_OFS;
else
lzs->hash_chain[inpos & (MAX_HISTORY - 1)] = lzs->hash_table[hash];
lzs->hash_table[hash] = inpos++;
hash_chain[inpos & (MAX_HISTORY - 1)] = hash_table[hash];
hash_table[hash] = inpos++;
}
}
if (inpos < srclen)
Expand Down
7 changes: 2 additions & 5 deletions lzstest.c
Expand Up @@ -18,10 +18,8 @@

#define __OPENCONNECT_INTERNAL_H__

struct lzs_state;
struct lzs_state *alloc_lzs_state(void);
int lzs_decompress(unsigned char *dst, int dstlen, const unsigned char *src, int srclen);
int lzs_compress(struct lzs_state *lzs, unsigned char *dst, int dstlen, const unsigned char *src, int srclen);
int lzs_compress(unsigned char *dst, int dstlen, const unsigned char *src, int srclen);

#include "lzs.c"

Expand All @@ -40,7 +38,6 @@ int main(void)
unsigned char pktbuf[MAX_PKT + 3];
unsigned char comprbuf[MAX_PKT * 9 / 8 + 2];
unsigned char uncomprbuf[MAX_PKT];
struct lzs_state *lzs = alloc_lzs_state();

srand(0xdeadbeef);

Expand All @@ -64,7 +61,7 @@ int main(void)
*(int *)(pktbuf + j) = r;
}

ret = lzs_compress(lzs, comprbuf, sizeof(comprbuf), pktbuf, pktlen);
ret = lzs_compress(comprbuf, sizeof(comprbuf), pktbuf, pktlen);
if (ret < 0) {
fprintf(stderr, "Compressing packet %d failed: %s\n", i, strerror(-ret));
exit(1);
Expand Down
8 changes: 1 addition & 7 deletions openconnect-internal.h
Expand Up @@ -194,8 +194,6 @@ struct proxy_auth_state {
char *challenge;
};

struct lzs_state;

struct openconnect_info {
#ifdef HAVE_ICONV
iconv_t ic_legacy_to_utf8;
Expand Down Expand Up @@ -371,8 +369,6 @@ struct openconnect_info {
z_stream deflate_strm;
uint32_t deflate_adler32;

struct lzs_state *lzs_state;

int disable_ipv6;
int reconnect_timeout;
int reconnect_interval;
Expand Down Expand Up @@ -640,9 +636,7 @@ int decompress_and_queue_packet(struct openconnect_info *vpninfo,

/* lzs.c */
int lzs_decompress(unsigned char *dst, int dstlen, const unsigned char *src, int srclen);
int lzs_compress(struct lzs_state *, unsigned char *dst, int dstlen,
const unsigned char *src, int srclen);
struct lzs_state *alloc_lzs_state(void);
int lzs_compress(unsigned char *dst, int dstlen, const unsigned char *src, int srclen);

/* ssl.c */
unsigned string_is_hostname(const char* str);
Expand Down

0 comments on commit 44476f1

Please sign in to comment.