现在的位置: 首页 > 综合 > 正文

QT分析之WebKit(七)

2013年09月07日 ⁄ 综合 ⁄ 共 6840字 ⁄ 字号 评论关闭

程序人生 2010-02-25 15:55:04 阅读335 评论0   字号: 订阅

接着前面的分析,先看m_decoder->decode(str, len);
String TextResourceDecoder::decode(const char* data, size_t len)
{
    if (!m_checkedForBOM)
        checkForBOM(data, len);  // 检查是否为Unicode编码

    bool movedDataToBuffer = false;

    if (m_contentType == CSS && !m_checkedForCSSCharset)
        if (!checkForCSSCharset(data, len, movedDataToBuffer))  // 如果是CSS,则检查CSS的字符集
            return "";

    if ((m_contentType == HTML || m_contentType == XML) && !m_checkedForHeadCharset) // HTML and XML
        if (!checkForHeadCharset(data, len, movedDataToBuffer))  // 检查HTML/XML的字符集
            return "";

    // Do the auto-detect if our default encoding is one of the Japanese ones.
    // FIXME: It seems wrong to change our encoding downstream after we have already done some decoding.
    if (m_source != UserChosenEncoding && m_source != AutoDetectedEncoding && encoding().isJapanese())
        detectJapaneseEncoding(data, len);  // 检查日文编码(为什么没有检查中文编码的啊?)

    ASSERT(encoding().isValid());

    if (m_buffer.isEmpty())
        return m_decoder.decode(data, len, false, m_contentType == XML, m_sawError);

    if (!movedDataToBuffer) {
        size_t oldSize = m_buffer.size();
        m_buffer.grow(oldSize + len);
        memcpy(m_buffer.data() + oldSize, data, len);
    }

    String result = m_decoder.decode(m_buffer.data(), m_buffer.size(), false, m_contentType == XML, m_sawError);
    m_buffer.clear();
    return result;
}
再回到tokenizer->write(decoded, true);看其具体实现:
bool HTMLTokenizer::write(const SegmentedString& str, bool appendData)
{
    if (!m_buffer)
        return false;
   
    if (m_parserStopped)
        return false;

    SegmentedString source(str);
    if (m_executingScript)
        source.setExcludeLineNumbers();

    if ((m_executingScript && appendData) || !m_pendingScripts.isEmpty()) {
        // don't parse; we will do this later
        if (m_currentPrependingSrc)
            m_currentPrependingSrc->append(source);
        else {
            m_pendingSrc.append(source);
#if PRELOAD_SCANNER_ENABLED
            if (m_preloadScanner && m_preloadScanner->inProgress() && appendData)
                m_preloadScanner->write(source);
#endif
        }
        return false;
    }
   
#if PRELOAD_SCANNER_ENABLED
    if (m_preloadScanner && m_preloadScanner->inProgress() && appendData)
        m_preloadScanner->end();
#endif

    if (!m_src.isEmpty())
        m_src.append(source);
    else
        setSrc(source);

    // Once a timer is set, it has control of when the tokenizer continues.
    if (m_timer.isActive())
        return false;

    bool wasInWrite = m_inWrite;
    m_inWrite = true;
   
#ifdef INSTRUMENT_LAYOUT_SCHEDULING
    if (!m_doc->ownerElement())
        printf("Beginning write at time %d ", m_doc->elapsedTime());
#endif
   
    int processedCount = 0;
    double startTime = currentTime();

    Frame* frame = m_doc->frame();

    State state = m_state;

    while (!m_src.isEmpty() && (!frame || !frame->loader()->isScheduledLocationChangePending())) {
        if (!continueProcessing(processedCount, startTime, state))
            break;

        // do we need to enlarge the buffer?
        checkBuffer();

        UChar cc = *m_src;

        bool wasSkipLF = state.skipLF();
        if (wasSkipLF)
            state.setSkipLF(false);

        if (wasSkipLF && (cc == ' '))
            m_src.advance();
        else if (state.needsSpecialWriteHandling()) {
            // it's important to keep needsSpecialWriteHandling with the flags this block tests
            if (state.hasEntityState())
                state = parseEntity(m_src, m_dest, state, m_cBufferPos, false, state.hasTagState());
            else if (state.inPlainText())
                state = parseText(m_src, state);
            else if (state.inAnySpecial())
                state = parseSpecial(m_src, state);
            else if (state.inComment())
                state = parseComment(m_src, state);
            else if (state.inDoctype())
                state = parseDoctype(m_src, state);
            else if (state.inServer())
                state = parseServer(m_src, state);
            else if (state.inProcessingInstruction())
                state = parseProcessingInstruction(m_src, state);
            else if (state.hasTagState())
                state = parseTag(m_src, state);
            else if (state.startTag()) {
                state.setStartTag(false);
               
                switch(cc) {
                case '/':
                    break;
                case '!': {
                    // or
                    searchCount = 1; // Look for '                    m_doctypeSearchCount = 1;
                    break;
                }
                case '?': {
                    // xml processing instruction
                    state.setInProcessingInstruction(true);
                    tquote = NoQuote;
                    state = parseProcessingInstruction(m_src, state);
                    continue;

                    break;
                }
                case '%':
                    if (!m_brokenServer) {
                        // <% server stuff, handle as comment %>
                        state.setInServer(true);
                        tquote = NoQuote;
                        state = parseServer(m_src, state);
                        continue;
                    }
                    // else fall through
                default: {
                    if( ((cc >= 'a') && (cc <= 'z')) || ((cc >= 'A') && (cc <= 'Z'))) {
                        // Start of a Start-Tag
                    } else {
                        // Invalid tag
                        // Add as is
                        *m_dest = '<';
                        m_dest++;
                        continue;
                    }
                }
                }; // end case

                processToken();

                m_cBufferPos = 0;
                state.setTagState(TagName);
                state = parseTag(m_src, state);
            }
        } else if (cc == '&' && !m_src.escaped()) {
            m_src.advancePastNonNewline();
            state = parseEntity(m_src, m_dest, state, m_cBufferPos, true, state.hasTagState());
        } else if (cc == '<' && !m_src.escaped()) {
            m_currentTagStartLineNumber = m_lineNumber;
            m_src.advancePastNonNewline();
            state.setStartTag(true);
            state.setDiscardLF(false);
        } else if (cc == ' ' || cc == ' ') {
            if (state.discardLF())
                // Ignore this LF
                state.setDiscardLF(false); // We have discarded 1 LF
            else {
                // Process this LF
                *m_dest++ = ' ';
                if (cc == ' ' && !m_src.excludeLineNumbers())
                    m_lineNumber++;
            }

            /* Check for MS-DOS CRLF sequence */
            if (cc == ' ')
                state.setSkipLF(true);
            m_src.advance(m_lineNumber);
        } else {
            state.setDiscardLF(false);
            *m_dest++ = cc;
            m_src.advancePastNonNewline();
        }
    }
   
#ifdef INSTRUMENT_LAYOUT_SCHEDULING
    if (!m_doc->ownerElement())
        printf("Ending write at time %d ", m_doc->elapsedTime());
#endif
   
    m_inWrite = wasInWrite;

    m_state = state;

    if (m_noMoreData && !m_inWrite && !state.loadingExtScript() && !m_executingScript && !m_timer.isActive()) {
        end(); // this actually causes us to be deleted
        return true;
    }
    return false;
}
在调用的时候,因为调用参数decoded是String类型的,所以先隐含转化成SegmentedString。SegmentedString可以附带行号,也可以不带行号(可以设定)。上面程序中的while循环主体,就是一个分析程序主体。

抱歉!评论已关闭.