/*************************************************************************** richtextparser.cpp - parses MSN and Plus messages ------------------- begin : April 30, 2008 copyright : (C) 2008 by Valerio Pilo (C) 2009 by Sjors Gielen email : valerio@kmess.org dazjorz@dazjorz.com ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * ***************************************************************************/ #include "richtextparser.h" #include "../contact/contactbase.h" #include "../utils/kmessshared.h" #include "../currentaccount.h" #include "../emoticonmanager.h" #include "../kmessdebug.h" #include <math.h> #include <QColor> #include <QRegExp> #include <QTextDocument> #include <QUrl> #include <KLocale> #include <KStandardDirs> // Insert an optional line break (<wbr/>) every WORDWRAP_EVERY characters #define WORDWRAP_EVERY 30 // Initialize the list of MSN Plus colors // Note that a whole lot of colors are still missing from the list QStringList RichTextParser::predefinedColors_( QStringList() << "FFFFFF" << "000000" << "00007F" << "009300" << "FF0000" << "7F0000" << "9C009C" << "FC7F00" << "FFFF00" << "00FC00" << "009393" << "00FFFF" << "2020FC" << "FF00FF" << "7F7F7F" << "D2D2D2" << "E7E6E4" << "CFCDD0" << "FFDEA4" << "FFAEB9" << "FFA8FF" << "B4B4FC" << "BAFBE5" << "C1FFA3" << "FAFDA2" << "B6B4D7" << "A2A0A1" << "F9C152" << "FF6D66" << "FF62FF" << "6C6CFF" << "68FFC3" << "8EFF67" << "F9FF57" << "858482" << "6E6B7D" << "FFA01E" << "F92611" << "FF20FF" << "202BFF" << "1EFFA5" << "60F913" << "FFF813" << "5E6464" << "4B494C" << "D98812" << "EB0505" << "DE00DE" << "0000D3" << "03CC88" << "59D80D" << "D4C804" << "000268" << "18171C" << "944E00" << "9B0008" << "980299" << "01038C" << "01885F" << "389600" << "9A9E15" << "473400" << "4D0000" << "5F0162" << "000047" << "06502F" << "1C5300" << "544D05" ); // Initialize the qhash maps QHash<QString, QString> RichTextParser::cleanedStringsCache_; QHash<QString, QString> RichTextParser::formattedStringsCache_; // Initialize the MSN Plus tag matching regexp QRegExp RichTextParser::colorMatch_( "\\[(c|a)=(#?[0-9a-z]+)\\](.*)\\[/\\1(?:=(#?[0-9a-z]+))?\\]", Qt::CaseInsensitive ); // Set the id counter for emoticons not yet received int RichTextParser::lastPendingEmoticonId_ = 0; // Set the pointer to the instance of the Emoticon Manager EmoticonManager *RichTextParser::emoticonManager_ = 0; // Return the given string with MSN Plus! formatting stripped out void RichTextParser::getCleanString( QString &string ) { // First check if the string does not need modification if( ! string.contains( "[" ) ) { return; } // Check if the string is already in cache if( cleanedStringsCache_.contains( string ) ) { string = cleanedStringsCache_.value( string ); return; } const QString originalString = string; string.replace( "[b]", "", Qt::CaseInsensitive ) .replace( "[/b]", "", Qt::CaseInsensitive ) .replace( "[i]", "", Qt::CaseInsensitive ) .replace( "[/i]", "", Qt::CaseInsensitive ) .replace( "[u]", "", Qt::CaseInsensitive ) .replace( "[/u]", "", Qt::CaseInsensitive ) .replace( "[s]", "", Qt::CaseInsensitive ) .replace( "[/s]", "", Qt::CaseInsensitive ); string.replace( QRegExp( "\\[/?(c|a)(=#?[0-9a-z,]+)?\\]", Qt::CaseInsensitive ), "" ); #ifdef KMESSDEBUG_RICHTEXTPARSER kDebug() << "Original:" << originalString; kDebug() << "Parsed:" << string; #endif // Add this to the cache cleanedStringsCache_.insert( originalString, string ); // Keep the queue size to the maximum allowed length if( cleanedStringsCache_.count() > MSN_PLUS_STRINGCACHESIZE ) { cleanedStringsCache_.remove( cleanedStringsCache_.constBegin().key() ); } } // Return the given string with MSN Plus! formatting parsed void RichTextParser::getFormattedString( QString &string ) { // First check if the string does not need modification if( ! string.contains( "[" ) ) { return; } // Check if the string is already in cache if( formattedStringsCache_.contains( string ) ) { string = formattedStringsCache_.value( string ); return; } const QString originalString = string; string.replace( "[b]", "<b>", Qt::CaseInsensitive ) .replace( "[/b]", "</b>", Qt::CaseInsensitive ) .replace( "[i]", "<i>", Qt::CaseInsensitive ) .replace( "[/i]", "</i>", Qt::CaseInsensitive ) .replace( "[u]", "<u>", Qt::CaseInsensitive ) .replace( "[/u]", "</u>", Qt::CaseInsensitive ) .replace( "[s]", "<s>", Qt::CaseInsensitive ) .replace( "[/s]", "</s>", Qt::CaseInsensitive ); while( colorMatch_.indexIn( string ) != -1 ) { bool isForeground = ( colorMatch_.cap( 1 ).toLower() == "c" ); // match a solid color if( colorMatch_.cap( 4 ).isEmpty() ) { string.replace( colorMatch_.pos(), colorMatch_.matchedLength(), "<span style='" + QString( isForeground ? "color" : "background-color" ) + ":" + getHtmlColor( colorMatch_.cap( 2 ) ) + ";'>" + colorMatch_.cap( 3 ) + "</span>" ); } // Match a foreground color gradient else if( isForeground ) { string.replace( colorMatch_.pos(), colorMatch_.matchedLength(), getHtmlGradient( colorMatch_.cap( 3 ), colorMatch_.cap( 2 ), colorMatch_.cap( 4 ) ) ); } // Match a background color gradient else { string.replace( colorMatch_.pos(), colorMatch_.matchedLength(), "<span style='background-color:qlineargradient(x1:0,y1:0,x2:1,y2:0," "stop:0 " + getHtmlColor( colorMatch_.cap( 2 ) ) + ",stop:1 " + getHtmlColor( colorMatch_.cap( 4 ) ) + ");'>" + colorMatch_.cap( 3 ) + "</span>" ); } } // Add the parsed string in a tag which is not usually used elsewhere: this drastically reduces parsing problems // originated by, for example, missing closing tags string = "<font>" + string + "</font>"; #ifdef KMESSDEBUG_RICHTEXTPARSER kDebug() << "Original:" << originalString; kDebug() << "Parsed:" << string; #endif // Add this to the cache formattedStringsCache_.insert( originalString, string ); // Keep the queue size to the maximum allowed length if( formattedStringsCache_.count() > MSN_PLUS_STRINGCACHESIZE ) { formattedStringsCache_.remove( formattedStringsCache_.constBegin().key() ); } } // Turns color codes (english color names, RGB triplets, MSN Plus! palette colors) into an HTML RGB color code QString RichTextParser::getHtmlColor( const QString& color ) { bool convertedOk; int colorNum = color.toInt( &convertedOk ); // Find colors in the MSN Plus! palette if( convertedOk ) { if( colorNum < predefinedColors_.size() ) { return "#" + predefinedColors_[ colorNum ]; } else { return "#000000"; } } // Find HTML (#RRGGBB/#RGB) or CSS (red,blue) color codes int colorLength = color.length(); if( colorLength > 0 && color[0] == '#' && colorLength != 4 && colorLength != 7 ) { // Pad with zeros incomplete RGB triplets QString paddedColor( color ); paddedColor.append( QString().fill( '0', 7 - colorLength ) ); QColor cssOrHtmlColor( paddedColor ); if( cssOrHtmlColor.isValid() ) { return cssOrHtmlColor.name(); } } else { QColor cssOrHtmlColor( color ); if( cssOrHtmlColor.isValid() ) { return cssOrHtmlColor.name(); } } return "#000000"; /* // These are not used with MSN+ Live, though are still parsed for backwards compatibility // Find arbitrary RGB triplets if( color.contains( "," ) ) { QStringList rgb = color.split( ",", QString::KeepEmptyParts ); QColor rgbColor( rgb[0].toInt(), rgb[1].toInt(), rgb[2].toInt() ); if( rgbColor.isValid() ) { return rgbColor.name(); } } */ } // Turns a string into a gradient colored one, using Qt HTML tags QString RichTextParser::getHtmlGradient( const QString& text, const QString& startColor, const QString& endColor ) { QColor start( getHtmlColor( startColor ) ); QColor end ( getHtmlColor( endColor ) ); // Skip gradienting wrong colors or empty strings if( ! start.isValid() || ! end.isValid() || text.isEmpty() ) { return text; } QList<int> tags; QChar character; QColor current = start; QString outputText; unsigned int indexGradient = 0; int indexFullString = 0; unsigned int levels = text.length(); int differenceRed, differenceGreen, differenceBlue, tempRed, tempGreen, tempBlue; QRegExp looseTagsMatch( "<[^>]+>|&[a-z]+;|&#[0-9]+;|\\[/?[ca](=#?[0-9a-z]+)?\\]", Qt::CaseInsensitive ); #ifdef KMESSDEBUG_RICHTEXTPARSER kDebug() << "Text to gradient:" << text; #endif // Search in the text all the tags to skip, and store their positions for faster parsing while( looseTagsMatch.indexIn( text, indexFullString, QRegExp::CaretAtOffset ) > -1 ) { int pos = looseTagsMatch.pos( 0 ); int len = looseTagsMatch.matchedLength(); #ifdef KMESSDEBUG_RICHTEXTPARSER kDebug() << "Adding tag to skip at pos" << pos << "(len" << len << "):" << looseTagsMatch.cap( 0 ); #endif // Add position and length of the tag to allow skipping it later tags << pos; tags << len; // Continue searching after the tag indexFullString = pos + len; // Subtract from the gradient size the tag length: it will be output as one single char levels -= len; } // Calculate the RGB difference between the starting and ending color differenceRed = (int)floor( (float)( start.red () - end.red () ) / (float)levels ); differenceGreen = (int)floor( (float)( start.green() - end.green() ) / (float)levels ); differenceBlue = (int)floor( (float)( start.blue () - end.blue () ) / (float)levels ); #ifdef KMESSDEBUG_RICHTEXTPARSER kDebug() << "Size is" << text.length() <<"(" << levels << "stripped) -" << "Colored from" << start.name() << "to" << end.name() << ", difference: (" << differenceRed << "," << differenceGreen << "," << differenceBlue << ")"; #endif // When the start and end colors are the same, don't waste time creating a gradient with only one color if( differenceRed == 0 && differenceGreen == 0 && differenceBlue == 0 ) { #ifdef KMESSDEBUG_RICHTEXTPARSER kDebug() << "The gradient colors are equal, converting it to solid color."; #endif return "<span style=\"color:" + start.name() + ";\">" + text + "</span>"; } // Proceed through the entire original string for( indexFullString = 0; indexFullString < text.length(); ++indexFullString ) { // Get the new values for the current gradient character tempRed = start.red () - ( differenceRed * indexGradient ); tempGreen = start.green() - ( differenceGreen * indexGradient ); tempBlue = start.blue () - ( differenceBlue * indexGradient ); // The values may get out of the limits, and since setRgb() voids the whole RGB color if one of the values is // out of range, we must assure them to be always in range current.setRgb( tempRed < 0 ? 0 : ( tempRed > 255 ? 255 : tempRed ), tempGreen < 0 ? 0 : ( tempGreen > 255 ? 255 : tempGreen ), tempBlue < 0 ? 0 : ( tempBlue > 255 ? 255 : tempBlue ) ); // Match the formatting tags: they must be skipped if( tags.count() > 0 && tags.first() == indexFullString ) { // Remove the tag position and size from the list tags.takeFirst(); int matchLength = tags.takeFirst(); // Output the whole tag as a single color outputText += "<span style=\"color:" + current.name() + ";\">" + text.mid( indexFullString, matchLength ) + "</span>"; #ifdef KMESSDEBUG_RICHTEXTPARSER kDebug() << "Outputting whole tag:" << text.mid( indexFullString, matchLength ); #endif // Move the index on to the tag end indexFullString += matchLength - 1; continue; } // Get the next character to colorize character = text[ indexFullString ]; // Use the <font> to save characters outputText += "<font color='" + current.name() + "'>" + character + "</font>"; indexGradient++; } return outputText; } /** * @brief Initialize the class */ 00377 void RichTextParser::initialize() { colorMatch_.setMinimal( true ); emoticonManager_ = EmoticonManager::instance(); } /** * @brief Replace the very old Messenger Plus characters with HTML markup * * Replace the oldest MSN+ tags with HTML-compatible formatting. This kind of formatting * method uses reserved characters - thing which may cause i18n problems - but many MSN+ * commands (most notably, the predefined messages with sound) still use it. * * @param text The text to format. The original string will be lost. */ 00394 void RichTextParser::parseMsnPlusString( QString &text ) { bool boldFlag = false; bool italicFlag = false; bool underlineFlag = false; bool fontFlag = false; QColor color; QRegExp htmlTest( "^\x04""&#?[a-z0-9]+;" ); QRegExp fontCapture = QRegExp( "^\x03""([0-9]{1,2})(,([0-9]{1,2}))?" ); for( int index = 0; index < text.length(); index++ ) { switch( text.at( index ).unicode() ) { case 0x0002: // bold character boldFlag = !boldFlag; text = text.replace( index, 1, ( boldFlag ) ? "<b>" : "</b>" ); index += ( boldFlag ) ? 2 : 3; // Skip the characters we've just added break; case 0x0003: // color character fontFlag = !fontFlag; fontCapture.indexIn( text, index, QRegExp::CaretAtOffset ); color = getHtmlColor( fontCapture.cap(1) ); // Font background text is ignored, as it's impossible to render in Qt's HTML subset if( fontCapture.matchedLength() == -1 ) { // No color found after the special character, close the html tag text = text.replace( index, 1, "</font>" ); index += 6; // Skip the characters we've just added } else { // Font color open text = text.replace( index, fontCapture.matchedLength(), "<font color='" + color.name() + "'>" ); index += 21; // Skip the characters we've just added } break; case 0x0004: // Sound tag: this character is followed by another which identifies the sound ID htmlTest.indexIn( text, index, QRegExp::CaretAtOffset ); if( htmlTest.matchedLength() != -1 ) { // Some sounds IDs are HTML entities: that has to be taken care of, too text = text.replace( index, htmlTest.matchedLength(), "" ); } else { // we need to delete this character and the following one from the string text = text.replace( index, 2, "" ); } // Restart from where we encountered the starting character index -= 1; break; case 0x0005: // italic character italicFlag = !italicFlag; text = text.replace( index, 1, ( italicFlag ) ? "<i>" : "</i>" ); index += ( italicFlag ) ? 2 : 3; // Skip the characters we've just added break; case 0x001f: // underline character underlineFlag = !underlineFlag; text = text.replace( index, 1, ( underlineFlag ) ? "<u>" : "</u>" ); index += ( underlineFlag ) ? 2 : 3; // Skip the characters we've just added break; } } // Close any tag still open. Hopefully, the parser will not complain too much if the closing order is wrong if( boldFlag ) text.append( "</b>" ); if( italicFlag ) text.append( "</i>" ); if( underlineFlag ) text.append( "</u>" ); if( fontFlag ) text.append( "</font>" ); } /** * Transform a string into its rich text form * * This is the one-stop shop for text parsing. This method is capable of single-pass conversion of * many things in their rich text equivalents: * - web links are made clickable, even geek-style "kmess.org" links and email addresses * - emoticons shortcuts are transformed in HTML image tags, even custom ones, and not yet received ones * - MSN Plus formatting is turned into HTML formatting * * @param text The string to parse * @param showEmoticons Whether to show MSN emoticons in the parsed string * @param showSmallEmoticons Whether the emoticons should be full-size or small * @param showLinks Whether to enable clickable links and email addresses * @param showFormatting Whether to show or strip away MSN+ formatting tags * @param allowEmoticonLinks If false, the parser will never add links for adding an emoticon (such as for the chat history dialog) * @param handle If not null or empty, custom emoticons of this contact will be parsed * @param pendingEmoticonTags If the handle is specified, this must be too: this is a list of pending * custom emoticons which the contact specified by <code>handle</code> has sent to us. */ 00501 void RichTextParser::parseMsnString( QString &text, bool showEmoticons, bool showSmallEmoticons, bool showLinks, bool showFormatting, bool allowEmoticonLinks, const QString &handle, QStringList &pendingEmoticonTags ) { // Remove all HTML KMessShared::htmlEscape( text ); bool allowAddingEmoticons = false; // Build a collection of all emoticon data const QRegExp &emoticonRegExp = emoticonManager_->getHtmlPattern(); const QHash<QString,QString> &emoticonReplacements = emoticonManager_->getHtmlReplacements( showSmallEmoticons ); QRegExp customRegExp; QRegExp pendingRegExp; QHash<QString,QString> customReplacements; QHash<QString,QString> customHashes; QStringList customEmoticonsBlacklist; // Get theme of custom emoticons if( &handle != 0 && ! handle.isEmpty() ) { // Avoid problems if no list of pending emoticons has been given if( &pendingEmoticonTags == 0 ) { kWarning() << "The given pending emoticons list is not valid!"; pendingEmoticonTags = QStringList(); } if( handle == CurrentAccount::instance()->getHandle() ) { customRegExp = emoticonManager_->getHtmlPattern( true ); customReplacements = emoticonManager_->getHtmlReplacements( showSmallEmoticons, true ); // We already have all of our emoticons, there are no pending ones } else { const ContactBase *contact = CurrentAccount::instance()->getContactByHandle( handle ); if( contact != 0 ) { customRegExp = contact->getEmoticonPattern(); customReplacements = contact->getEmoticonReplacements(); customHashes = contact->getEmoticonHashes(); pendingRegExp = contact->getPendingEmoticonPattern(); customEmoticonsBlacklist = contact->getEmoticonBlackList(); allowAddingEmoticons = allowEmoticonLinks; } } } // TODO: place these regexps at the beginning of this file and // initialize them *once*! QRegExp linkRegExp; linkRegExp.setPattern( "\\b((?:http://|https://|ftp://|sftp://|www\\.)" "\\S+)" // match protocol string followed by the host/path "[.,;!?]?(?:<|\\s|$)" // ending with <, \s or $, not counting .,;?!"' before // (there are some more modifications to a matched // URL below) ); linkRegExp.setMinimal(1); QRegExp emailRegExp; emailRegExp.setPattern( "\\b(" // begin of word, start capture "[a-zA-Z0-9_\\-\\.+]+" // match email username "\\@" // match '@' "[a-zA-Z0-9\\-\\.]+" // match domain hostname "\\.[a-zA-Z0-9]{2,6}" // match top-level-domain ")" // end capture` "(?:[^a-zA-Z0-9\\-]|$)" // not followed by more simple characters, or should find an end-of-line ); QRegExp geekLinkRegExp; geekLinkRegExp.setPattern( "(?:^|\\b)" // look-before test, for start of capture or word delimiter "(" // begin of word, start capture "([a-zA-Z0-9\\-]+\\.)+" // match simple characters, but it should contain a dot between each part "([a-zA-Z]{2,3})" // finally match domain part 2 or 3 characters "(/[a-zA-Z0-9\\-_/\\.?=&]+)?" // match the path on the server and simple query requests ")" // end capture "(?:[^a-zA-Z0-9]|$)" // not followed by more simple characters, or should find an end-of-line ); // these can also be initialized *once*! QRegExp longWordsRegExp( QString( "([\\w\\d-_\\.]{%1})([\\w\\d-_\\.]+)" ).arg( WORDWRAP_EVERY ) ); QStringList invalidCcTldList, topLevelDomainList; invalidCcTldList << "js" << "hh" << "cc" << "ui" << "fo" << "so" << "ko" << "qt" << "pp" << "cf" << "am" << "in" << "gz" << "ps" << "ai" << "rv" << "rm" << "wm" << "xd"; // block typical files instead of listing the whole country code list topLevelDomainList << "com" << "org" << "net" << "edu" << "gov"; // removing the need to test these every time: #ifdef KMESSTEST KMESS_ASSERT( emoticonRegExp.isValid() ); KMESS_ASSERT( emailRegExp.isValid() ); KMESS_ASSERT( linkRegExp.isValid() ); KMESS_ASSERT( geekLinkRegExp.isValid() ); KMESS_ASSERT( longWordsRegExp.isValid() ); #endif // Set the filename of the placeholder image for pending emoticons static QString pendingEmoticonPlaceholder( Qt::escape( KGlobal::dirs()->findResource( "appdata", "pics/empty.png" ) ) ); // Set up the emoticon replacement list QHash<QString,QString> emoticonReplacementList; QHash<QString,QString>::const_iterator ei; // first write emoticonReplacements, then overwrite with customReplacements. // We can't do pendingEmoticons right now, because we don't have a QHash of them. // This is considered TODO. ei = emoticonReplacements.constBegin(); while( ei != emoticonReplacements.constEnd()) { KMESS_ASSERT( ! ei.value().isEmpty() ); emoticonReplacementList.insert( ei.key(), "<span>" + ei.value() + "</span>" ); ++ei; } ei = customReplacements.constBegin(); while( ei != customReplacements.constEnd()) { KMESS_ASSERT( ! ei.value().isEmpty() ); if( customEmoticonsBlacklist.contains( ei.key() ) ) { #ifdef KMESSDEBUG_RICHTEXTPARSER kDebug() << "Ignoring blacklisted emoticon" << ei.key(); #endif ++ei; continue; } if( allowAddingEmoticons && ! emoticonManager_->emoticonIsAdded( customHashes.value( ei.key() ) ) ) { #ifdef KMESSDEBUG_RICHTEXTPARSER kDebug() << "Inserting emoticon additional link for " << ei.key() << " with hash " << customHashes.value( ei.key() ); #endif QString imagePath( ei.value() ); QString urlCode( QUrl::toPercentEncoding( ei.key() ) ); // Retrieve the image name from the replacement // TODO: Change the ' to \", and optimize/cache the result (somehow!) imagePath = imagePath.replace( "\"", "'"); imagePath = imagePath.mid( imagePath.indexOf( "src='" ) + 5 ); imagePath = imagePath.left( imagePath.indexOf( "'" ) ); // i18n() will unescape the string: Without this, an emoticon like " 'test " will // result in an attribute like this: " title='Add this emoticon: 'test' ", messing // up the whole markup QString escapedCode( ei.key() ); KMessShared::htmlEscape( escapedCode ); emoticonReplacementList.insert( ei.key(), "<a name='newEmoticon_" + urlCode + "' title='" + i18n( "Add this emoticon: %1", escapedCode ) + "' href='kmess://emoticon/" + handle + "/" + urlCode + "/" + QUrl::toPercentEncoding( imagePath ) + "'>" + ei.value() + "</a>" ); } else { #ifdef KMESSDEBUG_RICHTEXTPARSER kDebug() << "Not inserting emoticon additional link for " << ei.key() << ". allowAddingEmoticons=" << allowAddingEmoticons << "; customHashes.value( ei.key() ) = " << customHashes.value( ei.key() ) << "; emoticonIsAdded=" << emoticonManager_->emoticonIsAdded( customHashes.value( ei.key() ) ); #endif emoticonReplacementList.insert( ei.key(), ei.value() ); } ++ei; } // TODO: also do pending emoticons here // set up all regexps static const int REGEXP_COUNT = 7; const QRegExp* regexps[REGEXP_COUNT]; regexps[0] = ( showLinks ? &linkRegExp : 0 ); regexps[1] = ( showLinks ? &emailRegExp : 0 ); regexps[2] = ( ( showEmoticons && ! customRegExp .isEmpty() ) ? &customRegExp : 0 ); regexps[3] = ( ( showEmoticons && ! pendingRegExp .isEmpty() ) ? &pendingRegExp : 0 ); regexps[4] = ( showLinks ? &geekLinkRegExp : 0 ); regexps[5] = ( ( showEmoticons && ! emoticonRegExp.isEmpty() ) ? &emoticonRegExp : 0 ); regexps[6] = ( &longWordsRegExp ); // We apply the regexps in order, and each time, we take the matched part out of the string and re-add it // to the QStringList. Every piece of already-parsed data is HTML and starts with <, so we know what strings // not to parse. QString parseString, matched, replacement, tld, placeholderId, link, wordWrapLink; QStringList output( text ); #if QT_VERSION >= 0x040500 const QRegExp *regexp; #else QRegExp *regexp; #endif int index; for( int i = 0; i < REGEXP_COUNT; ++i ) { #if QT_VERSION >= 0x040500 regexp = regexps[i]; #else regexp = const_cast<QRegExp*>( regexps[i] ); // QRegExp->cap() is not const in Qt 4.4 #endif if( regexp == 0 ) { continue; } for( int j = 0; j < output.size(); ++j ) { parseString = output.at( j ); #ifdef KMESSDEBUG_RICHTEXTPARSER kDebug() << "applying regexp" << i << "to string" << parseString; #endif // Don't parse this piece, it's HTML if( parseString.startsWith( "<" ) ) { continue; } else if( parseString.isEmpty() ) { continue; } index = regexp->indexIn( parseString ); if( index != -1 ) { #ifdef KMESSDEBUG_RICHTEXTPARSER kDebug() << "Regexp" << i << "matches at character" << index << "in string" << j << ": " << parseString; #endif // Match, turn this piece into HTML (split string up into 3 strings) switch( i ) { // Normal link case 0: matched = regexp->cap( 1 ); // Some link normalizing: only allow ')' at the end if there's '(', same with ", etc if( matched.endsWith(")") && ! matched.contains("(") ) { matched.chop( 1 ); } if( matched.endsWith(">") && ! matched.contains("<") ) { matched.chop( 4 ); } if( matched.endsWith("}") && ! matched.contains("{") ) { matched.chop( 1 ); } if( matched.endsWith ( """ ) && matched.lastIndexOf( """, -6 ) == -1 ) { matched.chop( 5 ); } if( matched.endsWith ( "'" ) && matched.lastIndexOf( "'", -6 ) == -1 ) { matched.chop( 5 ); } // pre-wordwrap the link: make sure it word wraps nicely in KHTML :) wordWrapLink = QString(); for( int k = 0; k < matched.length(); k += WORDWRAP_EVERY ) { int nextSemi = matched.indexOf( ";", k + WORDWRAP_EVERY ); if( nextSemi - (k + WORDWRAP_EVERY) >= 0 && nextSemi - (k + WORDWRAP_EVERY) <= 5 ) { // There's a ; just after this text piece, so this piece may end just inside a HTML entity. // Therefore, we can't simply add <wbr/> here, we need to insert it just before // the &. int htmlEntityStarts = matched.lastIndexOf( "&", nextSemi ); int htmlEntityLength = nextSemi - htmlEntityStarts; // all text before the entity, then a wbr, then the entity wordWrapLink.append( matched.mid( k, htmlEntityStarts - k ) + "<wbr/>" + matched.mid( htmlEntityStarts, htmlEntityLength ) ); // then increase the current position just a little so the next run starts after the entity k += nextSemi - ( k + WORDWRAP_EVERY ); } else { // otherwise, just append. wordWrapLink.append( matched.mid( k, WORDWRAP_EVERY ) + "<wbr/>" ); } } wordWrapLink.chop( 6 ); // chop off the last <wbr/> if( matched.startsWith( "www." ) ) { replacement = "<a href=\"http://" + matched + "\" target=\"_blank\">" + wordWrapLink + "</a>"; } else { replacement = "<a href=\"" + matched + "\" target=\"_blank\">" + wordWrapLink + "</a>"; } break; // Email link case 1: matched = regexp->cap( 1 ); replacement = "<a href=\"mailto:" + matched + "\">" + matched + "</a>"; break; // Geek-style link case 4: matched = regexp->cap( 1 ); tld = geekLinkRegExp.cap( 3 ); if( ( tld.length() == 2 && ! invalidCcTldList.contains( tld.toLower() ) ) || ( tld.length() == 3 && topLevelDomainList.contains( tld.toLower() ) ) ) { replacement = "<a href=\"http://" + matched + "/\" target=\"_blank\">" + matched + "</a>"; } else { // Not a valid geeklink, don't replace it replacement = matched; } break; // Pending emoticon case 3: // For now, we will have to do pending emoticons seperately from custom // and normal ones. TODO: fix this. // Don't replace anything if this emoticon is blacklisted if( customEmoticonsBlacklist.contains( matched ) ) { replacement = matched; break; } placeholderId = "ce" + QString::number( ++lastPendingEmoticonId_ ); pendingEmoticonTags.append( placeholderId ); // Insert placeholder matched = regexp->cap( 0 ); replacement = "<img id='" + placeholderId + "' src='" + pendingEmoticonPlaceholder + // This is already escaped "' alt='" + matched + "' contact='" + Qt::escape( handle ) + "' width='16' height='16' valign='middle' " "class='customEmoticonPlaceholder' />"; break; // Custom emoticon: parsed like standard emoticons, to allow // overwriting a standard emoticon's shortcut with a custom one case 2: // Standard emoticon case 5: matched = regexp->cap( 0 ); // note that a regexp match starting with a html entity (such as a custom emoticon "&)" or the // MSN emoticon "('.')" ) will appear as for example '&)' in the regexp; this will not collide // with the emoticon ;) because it appears in the regexp as ;). So no worries, no hacks, no fixes needed. if( ! emoticonReplacementList.contains( matched ) ) { kWarning() << "Emoticon replacement list does not contain matched emoticon" << matched; replacement = matched; break; } replacement = emoticonReplacementList.value( matched ); break; // Long words case 6: matched = regexp->cap( 1 ); replacement = matched + "<wbr/>"; break; default: kWarning() << "Warning: Unhandled regexp"; replacement.clear(); break; } if( matched.isEmpty() ) { kError() << "Zero-length regexp match in regexp" << i << "- string:" << parseString; kError() << "Regexp at this point: " << regexp->pattern(); #ifdef KMESSTEST KMESS_ASSERT( regexp->pattern() == regexps[i]->pattern() ); #endif // try to fix it by skipping this frame and leaving the rest the same... j++; continue; } #ifdef KMESSDEBUG_RICHTEXTPARSER kDebug() << j << ":" << parseString.left( index ); kDebug() << j + 1 << ":" << replacement; kDebug() << j + 2 << ":" << parseString.mid( index + matched.length() ); #endif output.replace( j, parseString.left( index ) ); output.insert( j + 1, replacement); output.insert( j + 2, parseString.mid( index + matched.length() ) ); // Increment j here: it'll be also incremented by the loop. // This is because j+1 now is an html string and needs not to be parsed j++; #ifdef KMESSDEBUG_RICHTEXTPARSER kDebug() << "j is now:" << j; #endif } } } text = output.join( "" ) // Replace any "> "s in the message with "> " to avoid missing spaces after emoticons .replace( "> ", "> " ) // Replace double spaces with double s so that they'll show properly .replace( " ", " " ); // TODO: We don't want to remove MSN+ tags when showFormatting is disabled!! // Replace the MSN Plus text formatting tags if( showFormatting ) { parseMsnPlusString( text ); getFormattedString( text ); } else { getCleanString( text ); } } // Constructor, can be told to not parse the initial string but just save it (during KMess initialization) FormattedString::FormattedString( const QString &string, bool parseName ) : showEmoticons_ ( true ) , showLinks_ ( false ) , showSmallEmoticons_( true ) { if( ! string.isEmpty() ) { if( parseName ) { setString( string ); } else { original_ = formatted_ = escaped_ = cleaned_ = string; } } } // Returns the specified version of the string const QString &FormattedString::getString( FormattingMode mode ) const { switch( mode ) { case STRING_CLEANED: return cleaned_; case STRING_CLEANED_ESCAPED: return escaped_; case STRING_FORMATTED: return formatted_; case STRING_CHAT_SETTING: return CurrentAccount::instance()->getUseChatFormatting() ? formatted_ : cleaned_; case STRING_LIST_SETTING: return CurrentAccount::instance()->getUseListFormatting() ? formatted_ : cleaned_; case STRING_CHAT_SETTING_ESCAPED: return CurrentAccount::instance()->getUseChatFormatting() ? formatted_ : escaped_; case STRING_LIST_SETTING_ESCAPED: return CurrentAccount::instance()->getUseListFormatting() ? formatted_ : escaped_; default: return original_; } } // Set a string, which will be formatted according to the given flags void FormattedString::setString( const QString &string ) { // Save the new string everywhere cleaned_ = escaped_ = original_ = formatted_ = string; // Then parse the cleaned and formatted versions if( ! string.isEmpty() ) { RichTextParser::getCleanString( cleaned_ ); RichTextParser::parseMsnString( formatted_, showEmoticons_, showSmallEmoticons_, showLinks_, true ); escaped_ = cleaned_; KMessShared::htmlEscape( escaped_ ); } }