/*************************************************************************** mimemessage.cpp - description ------------------- begin : Sat Mar 8 2003 copyright : (C) 2003 by Mike K. Bennett email : mkb137b@hotmail.com ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * ***************************************************************************/ #include "mimemessage.h" #include "../kmessdebug.h" #include <QByteArray> #include <QRegExp> #include <QTextCodec> #include <KCharsets> #include <KCodecs> #include <KGlobal> /* The decode...() functions and getCodecByName() come from KMail and are therefore (C) KMail developers. */ // The constructor MimeMessage::MimeMessage() { } // The constructor that parses a message MimeMessage::MimeMessage(const QString &message) { #ifdef KMESSDEBUG_MIMEMESSAGE kmDebug() << "Parsing the sub message"; #endif parseMessage( message ); } // The constructor that parses a message MimeMessage::MimeMessage(const QByteArray &message) { // we get the header without the binary data that comes after int nullPos = message.indexOf('\0'); int endMime = message.indexOf("\r\n\r\n"); QString messageHeader( QString::fromUtf8(message.data(), (nullPos == -1) ? endMime : nullPos ) ); // Regexps to parse the header QRegExp rx1("Content-Type: ([A-Za-z0-9$!*/\\-]*)"); QRegExp rx2("Message-ID: ([^\r\n]+)"); // Get the Content-Type header QString contentType; if( rx1.indexIn( messageHeader ) != -1 ) { contentType = rx1.cap(1); } // Some messages should be preserved as binary, so take care of that. // - p2p messages have a 48 byte header. // - multi-packet messages can be split between a double byte utf-8 char. bool isP2P = ( contentType == "application/x-msnmsgrp2p" ); bool isMultiPacket = ( rx2.indexIn( messageHeader ) != -1 ); if( isP2P || isMultiPacket ) { #ifdef KMESSDEBUG_MIMEMESSAGE if( isP2P ) { kmDebug() << "Parsing the message, extracting binary p2p body."; } else if( isMultiPacket ) { kmDebug() << "Parsing the message, preserving multi-packet body as binary."; } #endif int bodyStart = endMime + 4; // 2 newlines int bodySize = message.size() - bodyStart; // Extract the Mime fields from the message QString mimeData( QString::fromUtf8( message.data(), bodyStart ) ); parseMessage( mimeData ); // Extract the binary data from the message. // Copy to our own managed copy. binaryBody_ = QByteArray( message.data() + bodyStart, bodySize ); } else { #ifdef KMESSDEBUG_MIMEMESSAGE kmDebug() << "Parsing the message."; #endif // This is a normal plain-text message. // The fromUtf8 call is required to convert Unicode characters properly! (like Chinese) parseMessage( QString::fromUtf8( message.data(), message.size()) ); } } // The copy constructor MimeMessage::MimeMessage(const MimeMessage& other) { #ifdef KMESSDEBUG_MIMEMESSAGE kmDebug() << "copy constructor"; #endif // Get the body of the other message fields_ = other.fields_; values_ = other.values_; body_ = other.body_; binaryBody_ = other.binaryBody_; // QValueList is implicitly shared, so the assign operation // does not create another copy. It uses "copy on write". // QByteArray is explicitly shared, but it's not a problem here, // since we don't modify the array data of binaryBody_. #ifdef KMESSTEST // Make sure the ascii-zero characters are copied correctly: KMESS_ASSERT( binaryBody_.size() == other.binaryBody_.size() ); KMESS_ASSERT( fields_.size() == other.fields_.size() ); #endif } // The destructor MimeMessage::~MimeMessage() { } // Add a field to the message void MimeMessage::addField(const QString& field, const QString& value) { fields_ << field; values_ << value; } // Change a field, or add it 00162 void MimeMessage::setField(const QString& field, const QString& value) { int index = fields_.indexOf( field ); if(-1 == index) { addField( field, value ); } else { values_[ index ] = value; } } // Clear all fields in MimeMessage for re-use the object void MimeMessage::clearFields() { body_ = ""; fields_.clear(); values_.clear(); binaryBody_ = ""; } // decodes MIME strings like =?iso...=...?= ... QString MimeMessage::decodeRFC2047String(const QByteArray &aStr) const { QString result; QByteArray charset; char *pos, *beg, *end, *mid=0; QByteArray str, cstr, LWSP_buffer; char encoding='Q', ch; bool valid, lastWasEncodedWord=FALSE; const int maxLen=200; int i; if( ! aStr.contains("=?") ) { //QString str( messageCodec->toUnicode(aStr) ); QString str( QString::fromUtf8( aStr ) ); if (str.indexOf('\n') == -1) return str; QString str2((QChar*)0, str.length()); int i = 0; while (i < str.length()) { if (str[i] == '\n') { str2 += ' '; i += 2; } else { str2 += str[i]; i++; } } return str2; } for (pos = const_cast<QByteArray&>( aStr ).data(); *pos; pos++) { // line unfolding if ( pos[0] == '\r' && pos[1] == '\n' ) { pos++; continue; } if ( pos[0] == '\n' ) continue; // collect LWSP after encoded-words, // because we might need to throw it out // (when the next word is an encoded-word) if ( lastWasEncodedWord && ( pos[0] == ' ' || pos[0] == '\t' ) ) { LWSP_buffer += pos[0]; continue; } // verbatimly copy normal text if (pos[0]!='=' || pos[1]!='?') { result += LWSP_buffer + pos[0]; LWSP_buffer = 0; lastWasEncodedWord = FALSE; continue; } // found possible encoded-word beg = pos+2; end = beg; valid = TRUE; // parse charset name charset = ""; for (i=2,pos+=2; i<maxLen && (*pos!='?'&&(*pos==' '||ispunct(*pos)||isalnum(*pos))); ++i) { charset += *pos; pos++; } if (*pos!='?' || i<4 || i>=maxLen) valid = FALSE; else { // get encoding and check delimiting question marks encoding = (char) toupper(pos[1]); if (pos[2]!='?' || (encoding!='Q' && encoding!='B')) valid = FALSE; pos+=3; i+=3; } if (valid) { mid = pos; // search for end of encoded part while (i<maxLen && *pos && !(*pos=='?' && *(pos+1)=='=')) { ++i; ++pos; } end = pos+2;//end now points to the first char after the encoded string if (i>=maxLen || !*pos) valid = FALSE; } if (valid) { // valid encoding: decode and throw away separating LWSP ch = *pos; *pos = '\0'; str = QByteArray( mid ).left((int)(mid - pos - 1)); if (encoding == 'Q') { // decode quoted printable text for (i=str.length()-1; i>=0; i--) if (str[i]=='_') str[i]=' '; cstr = KCodecs::quotedPrintableDecode(str); } else { // decode base64 text cstr = QByteArray::fromBase64( str ); } QTextCodec *codec = getCodecByName(charset); if (!codec) { result += QString::fromUtf8( cstr ); } else { result += codec->toUnicode(cstr); } lastWasEncodedWord = TRUE; *pos = ch; pos = end -1; } else { // invalid encoding, keep separating LWSP. //result += "=?"; //pos = beg -1; // because pos gets increased shortly afterwards pos = beg - 2; result += LWSP_buffer; result += *pos++; result += *pos; lastWasEncodedWord = FALSE; } LWSP_buffer = 0; } return result; } // Return the body of the message const QString& MimeMessage::getBody() const { return body_; } // Return the P2P data of the message const QByteArray& MimeMessage::getBinaryBody() const { return binaryBody_; } // Finds a QTextCodec by name QTextCodec* MimeMessage::getCodecByName(const QByteArray& codecName ) { if ( codecName.isEmpty() ) { return 0; } return KGlobal::charsets()->codecForName( codecName.toLower() ); } // Return the field and value at the given index void MimeMessage::getFieldAndValue(QString& field, QString& value, const int index) const { if ( index < fields_.count() ) { field = fields_[index]; value = values_[index]; } else { field = QString::null; value = QString::null; } } // Return the message fields as a big string QString MimeMessage::getFields() const { QString message( "" ); // Get the fields and values for( int i = 0; i < fields_.count(); i++ ) { message += fields_[i] + ": " + values_[i] + "\r\n"; } return message; } // Return the entire message as a big string QByteArray MimeMessage::getMessage() const { // So much easier with Qt4! QString message = QString( getFields() + "\r\n" + body_); // for messages with an empty body must use two \r\n // sequences. This fixes typing notifications not showing in WLM 2009. if( body_.isEmpty() && binaryBody_.isEmpty() ) { message += "\r\n"; } QByteArray textPart = message.toUtf8(); return textPart + binaryBody_; } // The total number of fields uint MimeMessage::getNoFields() const { return fields_.count(); } // Get one parameter of a value that has multiple parameters QString MimeMessage::getSubValue(const QString& field, const QString& subField) const { QString value, parameter; int left, right; // Get the value referred to by the field value = getValue( field ); if ( !value.isNull() ) { // If the subfield isn't specified, then get whatever is at the start of the message until the // first semicolon or the end of the line if ( subField.isNull() ) { left = 0; } else { // The left of the parameter is "subField=", the right is the next semicolon or the end of the line left = value.indexOf( subField + "=" ); if ( left >= 0 ) { left += subField.length() + 1; } } if ( left >= 0 ) { right = value.indexOf( ";", left ); if ( right < 0 ) { right = value.length(); } // Get the parameter parameter = value.mid( left, ( right - left ) ); return parameter; } } return QString::null; } // Get a value given a field QString MimeMessage::getValue(const QString& field ) const { // Search the fields for a match for( int i = 0; i < fields_.count(); i++ ) { if ( fields_[i] == field ) { return values_[i]; } } kmWarning() << "This message contained no field \"" << field << "\"."; return QString(); } // Test whether a given field exists in the message header bool MimeMessage::hasField(const QString& field) const { // Search the fields for a match for( int i = 0; i < fields_.count(); i++ ) { if ( fields_[i] == field ) { return true; } } return false; } // Parse the message into type, body, and fields and values void MimeMessage::parseMessage(const QString& message) { QString head, field, value; QStringList lines; // Split the message into head and body // Split the head into its various lines splitMessage( head, body_, message ); splitHead( lines, head ); // Split all the lines into field and value for( int i = 0; i < lines.count(); i++ ) { if( lines[i].startsWith("\t") ) { #ifdef KMESSTEST KMESS_ASSERT( fields_.last() == "Received" ); #endif // Addition to parse email headers, with Received: headers that continue at // the next line values_[ values_.count() - 1 ] += "\r\n" + lines[i].mid(2); } else { splitLine( field, value, lines[i] ); if ( !field.isEmpty() ) { // Add the fields and values to the lists addField( field, value ); } } } #ifdef KMESSTEST KMESS_ASSERT( fields_.count() == values_.count() ); #endif } // Print the contents of the message to kDebug (for debugging purposes) void MimeMessage::print() const { kmDebug() << "Printing MIME message, " << fields_.count() << " fields."; // Get the fields and values for( int i = 0; i < fields_.count(); i++ ) { kmDebug() << "Field: \"" << fields_[i] << "\" value: \"" << values_[i] << "\""; } if( binaryBody_.isEmpty() ) { kmDebug() << "Body:" << endl << body_; } else { kmDebug() << "Body: (" << binaryBody_.size() << " bytes of binary data)"; } } // Set the message body void MimeMessage::setBody(const QString& body) { body_ = body; } // Set the P2P data of the message void MimeMessage::setBinaryBody(const QByteArray &header, const QByteArray &body, const char footer[4]) { int headerSize = header.size(); int bodySize = body.size(); int footerPos = headerSize + bodySize; // Footer needs to be wrapped in a QByteArray // or replace( start, end, char* ) will reduce the size of the footer (Qt 4.3.3) QByteArray footerWrapper( footer, 4 ); // So much easier with Qt4! binaryBody_.resize( footerPos + 4 ); binaryBody_.replace( 0, headerSize, header ); binaryBody_.replace( headerSize, bodySize, body ); binaryBody_.replace( footerPos, 4, footerWrapper ); #ifdef KMESSTEST KMESS_ASSERT( binaryBody_.size() >= 52 ); // 48 bytes header, 4 bytes footer. #endif } // Set the P2P data of the message void MimeMessage::setBinaryBody(const QByteArray &data) { binaryBody_ = data; } // Split a line between field and value void MimeMessage::splitLine(QString& field, QString& value, const QString& line) const { if ( line.isEmpty() ) { return; } int index; index = line.indexOf(":"); if ( index >= 0 ) { field = line.left( index ); if ( ( index + 1 ) < (int)line.length() ) { value = line.right( line.length() - index - 2 ); } else { value = QString::null; } } else { kmWarning() << "Couldn't split line '" << line << "'"; } } // Split the message head into components and store it in the string list void MimeMessage::splitHead(QStringList& stringList, const QString& head) const { // adam (16 Oct 10) - split on \n only // the OfflineIM service is doing something funky and only // terminating lines with \n. If we split on \n, then trim() the string, // we'll strip out any \r's as well. stringList = head.split("\n"); for( int i = 0; i < stringList.count(); i++ ) { // special case for email headers (apparently?) if ( stringList[i].startsWith( "\t" ) ) continue; stringList[i] = stringList[i].trimmed(); } } // Split a message into head and body void MimeMessage::splitMessage(QString& head, QString& body, const QString& message) const { int index; int count; // The message is split into head and body at "\r\n\r\n" index = message.indexOf( "\r\n\r\n" ); count = 4; // Offline IM MIME messages don't use \r\n anymore? // wtf? (adam - 16 Oct 10) if ( index < 0 ) { index = message.indexOf("\n\n"); count = 2; } if ( index < 0 ) { head = message; body = ""; } else { head = message.left( index + ( count / 2 ) ); // Keep a "\r\n" at the end body = message.right( message.length() - index - count ); } }