Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge pull request #78 from nemomobile-packages/htmlParsing
[qmf] Use QTextDocument to parse html.
  • Loading branch information
VDVsx committed Mar 18, 2015
2 parents 65bb23b + 627b034 commit b6c0d04
Show file tree
Hide file tree
Showing 6 changed files with 62 additions and 19 deletions.
56 changes: 37 additions & 19 deletions qmf/src/libraries/qmfclient/qmailmessage.cpp
Expand Up @@ -62,6 +62,9 @@
#include <qtextcodec.h>
#include <QTextCodec>
#include <QtDebug>
#ifdef USE_HTML_PARSER
#include <QTextDocument>
#endif

#include <stdlib.h>
#include <limits.h>
Expand Down Expand Up @@ -8601,51 +8604,66 @@ void QMailMessage::refreshPreview()
{
const int maxPreviewLength = 280;
// TODO: don't load entire body into memory
// TODO: parse html correctly, e.g. closing brackets in quotes in tags
QMailMessagePartContainer *htmlPart= findHtmlContainer();
QMailMessagePartContainer *plainTextPart= findPlainTextContainer();
QString plainText;

if (multipartType() == MultipartRelated && htmlPart) // force taking the html in this case
plainTextPart=0;

if ( plainTextPart && plainTextPart->hasBody()) {
QString plaintext(plainTextPart->body().data());
plaintext.remove(QRegExp("\\[(image|cid):[^\\]]*\\]", Qt::CaseInsensitive));
metaDataImpl()->setPreview(plaintext.left(maxPreviewLength));
plainText = plainTextPart->body().data();
// These are not valid html, so remove them before
plainText.remove(QRegExp("\\[(image|cid):[^\\]]*\\]", Qt::CaseInsensitive));
} else if (htmlPart && ( multipartType() == MultipartRelated || htmlPart->hasBody())) {
QString markup = htmlPart->body().data();
markup.remove(QRegExp("<\\s*(style|head|form|script)[^<]*<\\s*/\\s*\\1\\s*>", Qt::CaseInsensitive));
markup.remove(QRegExp("<(.)[^>]*>"));
markup.replace("&quot;", "\"", Qt::CaseInsensitive);
markup.replace("&nbsp;", " ", Qt::CaseInsensitive);
markup.replace("&amp;", "&", Qt::CaseInsensitive);
markup.replace("&lt;", "<", Qt::CaseInsensitive);
markup.replace("&gt;", ">", Qt::CaseInsensitive);
plainText = htmlPart->body().data();

#ifndef USE_HTML_PARSER
plainText.remove(QRegExp("<\\s*(style|head|form|script)[^<]*<\\s*/\\s*\\1\\s*>", Qt::CaseInsensitive));
plainText.remove(QRegExp("<(.)[^>]*>"));
plainText.replace("&quot;", "\"", Qt::CaseInsensitive);
plainText.replace("&nbsp;", " ", Qt::CaseInsensitive);
plainText.replace("&amp;", "&", Qt::CaseInsensitive);
plainText.replace("&lt;", "<", Qt::CaseInsensitive);
plainText.replace("&gt;", ">", Qt::CaseInsensitive);

// now replace stuff like "&#1084;"
for (int pos = 0; ; ) {
pos = markup.indexOf("&#", pos);
pos = plainText.indexOf("&#", pos);
if (pos < 0)
break;
int semicolon = markup.indexOf(';', pos+2);
int semicolon = plainText.indexOf(';', pos+2);
if (semicolon < 0) {
++pos;
continue;
}
int code = (markup.mid(pos+2, semicolon-pos-2)).toInt();
int code = (plainText.mid(pos+2, semicolon-pos-2)).toInt();
if (code == 0) {
++pos;
continue;
}
markup.replace(pos, semicolon-pos+1, QChar(code));
plainText.replace(pos, semicolon-pos+1, QChar(code));
}

metaDataImpl()->setPreview(markup.simplified().left(maxPreviewLength));
}

#ifdef USE_HTML_PARSER
metaDataImpl()->setPreview(htmlToPlainText(plainText).left(maxPreviewLength));
#else
metaDataImpl()->setPreview(plainText.left(maxPreviewLength));
#endif
partContainerImpl()->setPreviewDirty(false);
}

#ifdef USE_HTML_PARSER
QString QMailMessage::htmlToPlainText(const QString &html)
{
QTextDocument doc;
doc.setHtml(html);
// Parse text a second time to prevent html injection via pre-hidden tags(e.g: &lt; img src="cenas.png" &gt;)
doc.setHtml(doc.toPlainText());
return doc.toPlainText();
}
#endif

/*! \internal */
QMailMessage QMailMessage::fromRfc2822(LongString& ls)
{
Expand Down
3 changes: 3 additions & 0 deletions qmf/src/libraries/qmfclient/qmailmessage.h
Expand Up @@ -814,6 +814,9 @@ class QMF_EXPORT QMailMessage : public QMailMessageMetaData, public QMailMessage

static QMailMessage fromRfc2822(LongString& ls);
void refreshPreview();
#ifdef USE_HTML_PARSER
static QString htmlToPlainText(const QString &html);
#endif

public:
virtual QString preview() const;
Expand Down
5 changes: 5 additions & 0 deletions qmf/src/libraries/qmfclient/qmfclient.pro
Expand Up @@ -17,6 +17,11 @@ win32: {
}

QT = core sql network

contains(DEFINES, USE_HTML_PARSER) {
QT += gui
}

DEPENDPATH += .
INCLUDEPATH += support

Expand Down
12 changes: 12 additions & 0 deletions qmf/src/tools/messageserver/main.cpp
Expand Up @@ -45,13 +45,20 @@
#include <qmaillog.h>
#include <qloggers.h>
#include <signal.h>
#ifdef USE_HTML_PARSER
#include <QtGui>
#endif

#if !defined(NO_SHUTDOWN_SIGNAL_HANDLING) && defined(Q_OS_UNIX)

static void shutdown(int n)
{
qMailLog(Messaging) << "Received signal" << n << ", shutting down.";
#ifdef USE_HTML_PARSER
QGuiApplication::exit();
#else
QCoreApplication::exit();
#endif
}
#endif

Expand All @@ -66,7 +73,12 @@ static void recreateLoggers(int n)

Q_DECL_EXPORT int main(int argc, char** argv)
{
#ifdef USE_HTML_PARSER
// Need for html parsing by <QTextdocument> in qmailmessage.cpp
QGuiApplication app(argc, argv);
#else
QCoreApplication app(argc, argv);
#endif

// This is ~/.config/QtProject/Messageserver.conf
qMailLoggersRecreate("QtProject", "Messageserver", "Msgsrv");
Expand Down
4 changes: 4 additions & 0 deletions qmf/src/tools/messageserver/messageserver.pro
Expand Up @@ -26,6 +26,10 @@ equals(QT_MAJOR_VERSION, 5){
CONFIG += qmfmessageserver qmfclient
QT = core

contains(DEFINES, USE_HTML_PARSER) {
QT += gui
}

!contains(DEFINES,QMF_NO_MESSAGE_SERVICE_EDITOR) {
QT += gui
equals(QT_MAJOR_VERSION, 5): QT += widgets
Expand Down
1 change: 1 addition & 0 deletions rpm/qmf-qt5.spec
Expand Up @@ -134,6 +134,7 @@ This package contains the documentation for Qt Messaging Framework (QMF).
DEFINES+=MESSAGESERVER_PLUGINS \
DEFINES+=QMF_NO_MESSAGE_SERVICE_EDITOR \
DEFINES+=USE_KEEPALIVE \
DEFINES+=USE_HTML_PARSER \
CONFIG+=syslog

make %{?_smp_mflags}
Expand Down

0 comments on commit b6c0d04

Please sign in to comment.