Logo Search packages:      
Sourcecode: kmess version File versions  Download package

void OldRichTextParser::parseMsnString ( QString &  text,
bool  showEmoticons = true,
bool  showSmallEmoticons = true,
bool  showLinks = true,
bool  showFormatting = false,
const QString &  handle = *((QString*)0),
QStringList &  pendingEmoticonTags = *((QStringList*)0) 
) [static]

Transform a string into its rich text form

This is the one-stop shop for text parsing. This method is capable of single-pass conversion of many things in their rich text equivalents:

  • web links are made clickable, even geek-style "kmess.org" links and email addresses
  • emoticons shortcuts are transformed in HTML image tags, even custom ones, and not yet received ones
  • MSN Plus formatting is turned into HTML formatting

Parameters:
text The string to parse
showEmoticons Whether to show MSN emoticons in the parsed string
showSmallEmoticons Whether the emoticons should be full-size or small
showLinks Whether to enable clickable links and email addresses
showFormatting Whether to show or strip away MSN+ formatting tags
handle If not null or empty, custom emoticons of this contact will be parsed
pendingEmoticonTags If the handle is specified, this must be too: this is a list of pending custom emoticons which the contact specified by handle has sent to us.

Definition at line 494 of file old_richtextparser.cpp.

References CurrentAccount::getContactByHandle(), ContactBase::getEmoticonBlackList(), ContactBase::getEmoticonPattern(), ContactBase::getEmoticonReplacements(), EmoticonManager::getHtmlPattern(), EmoticonManager::getHtmlReplacements(), ContactBase::getPendingEmoticonPattern(), KMessShared::htmlEscape(), and parseMsnPlusString().

{
  KMessShared::htmlEscape( text );

  // Links and emoticons are replaced in one loop cycle, traversing the message text
  // Multiple search-replace cyles give unwanted side effects:
  // - smileys can pop up in links like ftp://user:pass@host/ and https://host
  // - emoticon replacements could be replaced by another cycle

  bool                         allowAddingEmoticons = false;

  // Build a collection of all emoticon data
  const QRegExp               &emoticonRegExp       = emoticonManager_->getHtmlPattern();
  const QHash<QString,QString> &emoticonReplacements = emoticonManager_->getHtmlReplacements( showSmallEmoticons );


  QString code;
  QRegExp customRegExp;
  QRegExp pendingRegExp;
  QHash<QString,QString> customReplacements;
  QRegExp userCustomEmoticons;
  QStringList customEmoticonsBlacklist;

  // Get theme of custom emoticons
  if( &handle != 0 && ! handle.isEmpty() )
  {
    // Avoid problems if no list of pending emoticons has been given
    if( &pendingEmoticonTags == 0 )
    {
      kWarning() << "The given pending emoticons list is not valid!";
      pendingEmoticonTags = QStringList();
    }

    if( handle == CurrentAccount::instance()->getHandle() )
    {
        customRegExp       = emoticonManager_->getHtmlPattern( true );
        customReplacements = emoticonManager_->getHtmlReplacements( showSmallEmoticons, true );
        // We already have all of our emoticons, there are no pending ones
    }
    else
    {
      const ContactBase *contact = CurrentAccount::instance()->getContactByHandle( handle );
      if( contact != 0 )
      {
        customRegExp       = contact->getEmoticonPattern();
        customReplacements = contact->getEmoticonReplacements();
        pendingRegExp      = contact->getPendingEmoticonPattern();
        customEmoticonsBlacklist = contact->getEmoticonBlackList();

        allowAddingEmoticons = true;
        userCustomEmoticons = emoticonManager_->getHtmlPattern( true ); // Used to verify if we've already added an emoticon
      }
    }
  }


  QRegExp linkRegExp;
  linkRegExp.setPattern( "\\b(?:http://|https://|ftp://|sftp://|www\\.)"  // match protocol string
                         "[^ \r\n]+"                                      // followed by the host/path
                       );

  QRegExp emailRegExp;
  emailRegExp.setPattern(
                          "\\b("                   // begin of word, start capture
                          "[a-z0-9_\\-\\.]+"       // match email username
                          "\\@"                    // match '@'
                          "[a-z0-9\\-\\.]+"        // match domain hostname
                          "\\.[a-z0-9]{2,6}"       // match top-level-domain
                          ")"                      // end capture`
                          "(?:[^a-zA-Z0-9\\-]|$)"  // not followed by more simple characters, or should find an end-of-line
                        );

  QRegExp geekLinkRegExp;
  geekLinkRegExp.setPattern(
                             "(^|\\b)"                  // look-before test, for start of capture or word delimiter
                             "("                        // begin of word, start capture
                             "([a-z0-9\\-]+\\.)+"       // match simple characters, but it should contain a dot between each part
                             "([a-z]{2,3})"             // finally match domain part 2 or 3 characters
                             "(/[a-z0-9\\-_/\\.?=&]+)?" // match the path on the server and simple query requests
                             ")"                        // end capture
                             "(?:[^a-zA-Z0-9]|$)"       // not followed by more simple characters, or should find an end-of-line
                           );

  QRegExp longWordsRegExp( "([\\w\\d-_\\.]{30})([\\w\\d-_\\.]+)" );
  QRegExp punctuationChars( "(?:[.,;!?\"'])$" );
  QRegExp invalidCcTld( "^(js|hh|cc|ui|fo|so|ko|qt|pp|cf|am|in|gz|ps|ai|rv|rm|wm)$" ); // block typical files instead of listing the whole country code list
  QRegExp topLevelDomain( "^(?:com|org|net|edu|gov)$" );

#ifdef KMESSTEST
  KMESS_ASSERT( emoticonRegExp.isValid() );
  KMESS_ASSERT( emailRegExp.isValid() );
  KMESS_ASSERT( linkRegExp.isValid() );
  KMESS_ASSERT( geekLinkRegExp.isValid() );
  KMESS_ASSERT( longWordsRegExp.isValid() );
#endif

  // Set the filename of the placeholder image for pending emoticons
  static QString pendingEmoticonPlaceholder( KGlobal::dirs()->findResource( "appdata", "pics/empty.png" ) );

  static const int REGEXP_COUNT = 7;
  const QRegExp* regexps[REGEXP_COUNT];

  // Allow to overwrite standard emoticons by parsing custom emoticons before standard ones
  regexps[0] = ( ( showEmoticons && ! customRegExp  .isEmpty() ) ? &customRegExp   : 0 );
  regexps[1] = ( ( showEmoticons && ! pendingRegExp .isEmpty() ) ? &pendingRegExp  : 0 );
  regexps[2] = ( ( showEmoticons && ! emoticonRegExp.isEmpty() ) ? &emoticonRegExp : 0 );
  regexps[3] = ( showLinks ? &linkRegExp : 0 );
  regexps[4] = ( showLinks ? &emailRegExp : 0 );
  regexps[5] = ( showLinks ? &geekLinkRegExp : 0 );
  regexps[6] = ( &longWordsRegExp );

  QString replacement;
  int lastPos = 0;
  int matches[ REGEXP_COUNT ];
  memset( matches, -1, sizeof( matches ) );

  while( true )
  {
    // Find out which expression matches first
    int matchedRegExp = -1;
    int matchStart    = -1;
    int matchedLength = 0;
    for(int i = 0; i < REGEXP_COUNT; i++)
    {
      if( regexps[ i ] == 0 )
      {
        continue;
      }

      matches[ i ] = regexps[ i ]->indexIn( text, lastPos );
      if( matches[ i ] == -1 || (int) text.length() < matches[ i ] )
      {
        continue;
      }

#ifdef KMESSDEBUG_RICHTEXTPARSER
      kDebug() << "regexp " << i << " matches at character " << matches[ i ];
#endif

      // See if it's before all other regexps
      if( matches[ i ] < matchStart || matchStart == -1 )
      {
        matchStart    = matches[ i ];
        matchedRegExp = i;
        matchedLength = regexps[ i ]->matchedLength();
      }
    }


    QString link;
    QString linkBefore;
    QString code;
    QString altText;
    QString placeholderId;

    // Determine the replacement for the matched expression
    switch( matchedRegExp )
    {

      // Found a custom emoticon
      case 0:
        code = text.mid( matchStart, customRegExp.matchedLength() );  // cap(0) is not const

        // Avoid replacing invalid emoticons with nothing
        if( ! customReplacements.contains( code ) )
        {
#ifdef KMESSDEBUG_RICHTEXTPARSER
          kWarning() << "Emoticon replacement for '" << code << "' not found!";
#endif
          replacement = code;
          break;
        }

        // Do not display unwanted emoticons
        if( customEmoticonsBlacklist.contains( code ) )
        {
#ifdef KMESSDEBUG_RICHTEXTPARSER
          kDebug() << "Ignoring blacklisted emoticon" << code;
#endif
          replacement = code;
          break;
        }

        replacement = customReplacements[ code ];

        // This emoticon is unknown, allow the user to add it by adding an internal KMess link to it
        if( allowAddingEmoticons && userCustomEmoticons.indexIn( code ) == -1 )
        {
#ifdef KMESSDEBUG_RICHTEXTPARSER
          kDebug() << "Inserting emoticon addition link:" << code;
#endif
          QString imagePath( replacement );
          QString urlCode( QUrl::toPercentEncoding( code ) );

          // Retrieve the image name from the replacement
          imagePath = imagePath.replace( "\"", "'" );
          imagePath = imagePath.mid( imagePath.indexOf( "src='" ) + 5 );
          imagePath = imagePath.mid( 0, imagePath.indexOf( "'" ) );

          // i18n() will unescape the string: Without this, an emoticon like " 'test " will result in
          // an attribute like this: " title='Add this emoticon: 'test' ", messing up the whole markup
          QString escapedCode( code );
          KMessShared::htmlEscape( escapedCode );

          // The name attribute is required as, if the user adds the emoticon, we'll want to make all links like this unclickable
          replacement = "<a name='newEmoticon_" + urlCode
                        + "' title='" + i18n( "Add this emoticon: %1", escapedCode )
                        + "' href='kmess://emoticon/" + handle + "/" +  urlCode + "/" + QUrl::toPercentEncoding( imagePath )
                        + "'>"
                        + replacement
                        + "</a>";
        }
#ifdef KMESSDEBUG_RICHTEXTPARSER
        else
        {
          kDebug() << "Allow adding emoticons:" << allowAddingEmoticons << "code:" << code;
        }
#endif
        break;


      // Found a custom emoticon, but the image file is still being downloaded
      // Generate a placeholder tag, <img src="empty.png">, and update this tag later when the emoticon is received
      case 1:
        // Generate and store placeholder ID
        placeholderId = "ce" + QString::number( ++lastPendingEmoticonId_ );
        pendingEmoticonTags.append( placeholderId );

        // Insert placeholder
        code = text.mid( matchStart, pendingRegExp.matchedLength() );  // cap(0) is not const
        replacement = "<img id='" + placeholderId
                    + "' src='" + Qt::escape( pendingEmoticonPlaceholder )

                    + "' alt='" + code
                    + "' contact='" + Qt::escape( handle )
                    + "' width='16' height='16' valign='middle' class='customEmoticonPlaceholder' />";
        break;


      // Found an emoticon
      case 2:
        code = text.mid( matchStart, emoticonRegExp.matchedLength() );  // cap(0) is not const
        if( emoticonReplacements.contains( code ) )
        {
          replacement = emoticonReplacements[ code ];
        }
        else
        {
          // HACK: Replace with the same string, to skip the entire code and continue parsing after it
          // See EmoticonTheme::updateCache()
          replacement = code;
#ifdef KMESSDEBUG_RICHTEXTPARSER
          kDebug() << "Skipping unmatched code '" << code << "'";
#endif
        }

        break;


      // Found a link
      case 3:
        // When www. is found, automatically add http:// to the href
        // This doesn't clash, because http:// links are matched earlier
        link = linkRegExp.cap(0);
        if( ! link.isEmpty() )
        {
          // filter out puntuation char
          matchedLength = link.length();
          if(  punctuationChars.exactMatch(link)                // matches standard chars at end
          || ( link.endsWith(")") && ! link.contains("(") ) )   // has ")" at end, unless it's a wikipedia disambiguation link
          {
            matchedLength--;
            link.remove( matchedLength, 1 );
          }

          // Filter out long words in links (one example: eBay auctions)
          QString longWordFilter( link );
          longWordFilter.replace( longWordsRegExp, "\\1&shy;\\2" );

          // Create link
          replacement = ( link.startsWith("www.") )
                        ? replacement = "<a href=\"http://" + link + "\" target=\"_blank\">" + longWordFilter + "</a>"
                        : replacement = "<a href=\""        + link + "\" target=\"_blank\">" + longWordFilter + "</a>";
        }
        break;


      // Found a email address
      case 4:
        link = emailRegExp.cap(1);
        if( ! link.isEmpty() )
        {
          matchedLength = link.length();  // filter out puntuation char
          replacement   = "<a href=\"mailto:" + link + "\">" + link + "</a>";
        }
        break;


      // Found a geek-style link
      case 5:
        linkBefore = geekLinkRegExp.cap(1); // matched look-before character
        link       = geekLinkRegExp.cap(2);
        if( ! link.isEmpty() )
        {
          // Avoid matching "index.htm", "test.js" etc.
          // The list can never be complete but filter out 99% of the cases
          QString tld( geekLinkRegExp.cap(4) );
          if( ( tld.length() == 2 && ! invalidCcTld.exactMatch(tld) )
          ||  ( tld.length() == 3 && topLevelDomain.exactMatch(tld) ) )
          {
            matchedLength = linkBefore.length() + link.length();  // filter out puntuation char
            replacement = linkBefore + "<a href=\"http://" + link + "/\" target=\"_blank\">" + link + "</a>";
          }
        }
        break;


      // Found an excessively long word (for example, aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaah!)
      case 6:
        matchedLength = longWordsRegExp.cap( 0 ).length();
        replacement = longWordsRegExp.cap( 1 ) + "&shy;" + longWordsRegExp.cap( 2 );
        break;

      // Nothing was found, stop
      case -1:
        break;

      default:
#ifdef KMESSDEBUG_RICHTEXTPARSER
        kWarning() << "result of regular expression " << matchedRegExp << " is unhandled!";
#endif
        break;
    }

    // Nothing was found, stop
    if( matchedRegExp == -1 )
    {
      // C doesn't allow us to break the while loop inside the switch statement, so break again
      break;
    }

    // Process the replacement
    if( replacement.isEmpty() || text.mid(matchStart, matchedLength).isEmpty() )
    {
      // No replacement found, move cursor to next char
      lastPos = matchStart + 1;
    }
    else
    {
      // Replace the original text
#ifdef KMESSDEBUG_RICHTEXTPARSER
      kDebug() << "replacing '" << text.mid( matchStart, matchedLength ) << "' with: " << replacement << " (matched regexp=" << matchedRegExp << ")";
#endif
      text.replace( matchStart, matchedLength, replacement );
      lastPos     = matchStart + replacement.length();
      replacement = QString::null;
    }
  }

  // Replace any "> "s in the message with ">&nbsp;" to avoid missing spaces after emoticons
  text = text.replace( "> ", ">&nbsp;" );

  // Replace double spaces with double &nbsp;s so that they'll show properly
  text = text.replace( "  ", "&nbsp;&nbsp;" );

  // Replace the MSN Plus text formatting tags
  if( showFormatting )
  {
    parseMsnPlusString( text );
    getFormattedString( text );
  }
  else
  {
    getCleanString( text );
  }
}


Generated by  Doxygen 1.6.0   Back to index