77.39% (493/637)

Uncovered changed code (with context):

================================================================================
src/Storages/MergeTree/IMergeTreeReader.cpp
================================================================================

--- uncovered block 181-181 ---
      179 |             + " of type " + part_storage->getDiskType() + ")");
      180 |         throw;
>>    181 |     }
      182 | }
      183 | 
================================================================================
src/Storages/MergeTree/MergeTreeIndexConditionText.cpp
================================================================================

--- uncovered block 74-76 ---
       72 |         {
       73 |             if (const auto & re2 = pattern.getRE2())
>>     74 |             {
>>     75 |                 hash.update(re2->pattern());
>>     76 |             }
       77 |             else
       78 |             {

--- uncovered block 536-536 ---
      534 |     const String value = preprocessor->processConstant(field.safeGet<String>());
      535 |     if (value.empty())
>>    536 |         return {};
      537 | 
      538 |     const char * data = value.data();

--- uncovered block 548-548 ---
      546 |     /// Must start with at least one '%'.
      547 |     if (data[pos] != '%')
>>    548 |         return {};
      549 | 
      550 |     while (pos < length && data[pos] == '%')
================================================================================
src/Storages/MergeTree/MergeTreeIndexText.cpp
================================================================================

--- uncovered block 475-478 ---
      473 |     auto sparse_index = loadSparseIndex(header_stream, state);
      474 |     if (sparse_index->empty())
>>    475 |     {
>>    476 |         can_use_like_dictionary_scan = true;
>>    477 |         return;
>>    478 |     }
      479 | 
      480 |     const size_t max_postings_to_read = condition_text.getContext()->getSettingsRef()[Setting::text_index_like_max_postings_to_read];

--- uncovered block 526-526 ---
      524 | 
      525 |         if (postings_to_read >= max_postings_to_read)
>>    526 |         {
      527 |             /// Too many large-posting tokens matched.
      528 |             /// Not all dictionary blocks were scanned, so the set of matched pattern tokens is incomplete.

--- uncovered block 529-530 ---
      527 |             /// Too many large-posting tokens matched.
      528 |             /// Not all dictionary blocks were scanned, so the set of matched pattern tokens is incomplete.
>>    529 |             return;
>>    530 |         }
      531 |     }
      532 | 

--- uncovered block 538-538 ---
      536 |         {
      537 |             if (query->patterns.empty())
>>    538 |                 continue;
      539 | 
      540 |             if (std::ranges::any_of(

--- uncovered block 649-653 ---
      647 | 
      648 | size_t MergeTreeIndexGranuleText::memoryUsageBytes() const
>>    649 | {
>>    650 |     size_t memory_usage_bytes = sizeof(*this)
>>    651 |         + remaining_tokens.capacity() * sizeof(*remaining_tokens.begin())
>>    652 |         + rare_tokens_postings.capacity() * sizeof(*rare_tokens_postings.begin())
>>    653 |         + pattern_tokens.capacity() * sizeof(*pattern_tokens.begin());
      654 | 
      655 |     for (const auto & [hash, tokens_vec] : pattern_tokens_per_query)

--- uncovered block 655-656 ---
      653 |         + pattern_tokens.capacity() * sizeof(*pattern_tokens.begin());
      654 | 
>>    655 |     for (const auto & [hash, tokens_vec] : pattern_tokens_per_query)
>>    656 |         memory_usage_bytes += tokens_vec.capacity() * sizeof(String);
      657 | 
      658 |     return memory_usage_bytes;

--- uncovered block 658-659 ---
      656 |         memory_usage_bytes += tokens_vec.capacity() * sizeof(String);
      657 | 
>>    658 |     return memory_usage_bytes;
>>    659 | }
      660 | 
      661 | bool MergeTreeIndexGranuleText::hasAnyQueryTokens(const TextSearchQuery & query) const

--- uncovered block 755-755 ---
      753 | {
      754 |     if (query.patterns.empty())
>>    755 |         return false;
      756 | 
      757 |     /// If the dictionary scan was cut short (too many large-posting tokens), the set of

--- uncovered block 760-760 ---
      758 |     /// matched pattern tokens is incomplete. Conservatively assume the pattern may match.
      759 |     if (!can_use_like_dictionary_scan)
>>    760 |         return true;
      761 | 
      762 |     const auto & query_tokens = getPatternTokensForTextQuery(query);

--- uncovered block 765-765 ---
      763 | 
      764 |     if (!current_range.has_value())
>>    765 |         return !query_tokens.empty();
      766 | 
      767 |     PostingList range_posting;

--- uncovered block 783-783 ---
      781 | 
      782 |         if (!has_any_range)
>>    783 |             continue;
      784 | 
      785 |         /// We read postings only for tokens that have one block.
================================================================================
src/Storages/MergeTree/MergeTreeIndexText.h
================================================================================

--- uncovered block 311-311 ---
      309 |     const std::vector<String> & getPatternTokensForTextQuery(const TextSearchQuery & query) const;
      310 |     PostingListPtr getPostingsForRareToken(std::string_view token) const;
>>    311 |     bool canUseLikeDictionaryScan() const { return can_use_like_dictionary_scan; }
      312 |     void setCurrentRange(RowsRange range) { current_range = std::move(range); }
      313 |     const String & getIndexIdForCaches() const { return index_id_for_caches; }
================================================================================
src/Storages/MergeTree/MergeTreeReaderTextIndex.cpp
================================================================================

--- uncovered block 97-97 ---
       95 |     const auto & condition_text = assert_cast<const MergeTreeIndexConditionText &>(*index.condition);
       96 |     if (!condition_text.getAllSearchPatterns().empty())
>>     97 |     {
       98 |         /// Build a fallback evaluation path for when the dictionary scan is cut short
       99 |         /// (too many pattern-matching tokens exceed text_index_like_max_postings_to_read).

--- uncovered block 106-107 ---
      104 |         /// (the original search predicate) and determine the required physical columns
      105 |         /// from it. The fallback reader is then created for those physical columns only.
>>    106 |         auto context_copy = createContextForDefaultExpressions();
>>    107 |         auto combined_columns = buildCombinedColumnsForDefaultExpressions();
      108 | 
      109 |         /// Build a header block containing all physical columns (column type only, no data).

--- uncovered block 113-115 ---
      111 |         /// a StorageDummy from it — StorageDummy requires at least one column, so the header
      112 |         /// must be non-empty.
>>    113 |         Block physical_header;
>>    114 |         for (const auto & phys_col : storage_snapshot->metadata->getColumns().getAllPhysical())
>>    115 |             physical_header.insert({phys_col.type->createColumn(), phys_col.type, phys_col.name});
      116 | 
      117 |         NameSet fallback_columns_set;

--- uncovered block 117-122 ---
      115 |             physical_header.insert({phys_col.type->createColumn(), phys_col.type, phys_col.name});
      116 | 
>>    117 |         NameSet fallback_columns_set;
>>    118 |         for (const auto & col : columns_)
>>    119 |         {
>>    120 |             auto search_query = condition_text.getSearchQueryForVirtualColumn(col.name);
>>    121 |             if (!search_query || search_query->patterns.empty())
>>    122 |                 continue;
      123 | 
      124 |             /// Compile the virtual column's default expression (the original search predicate).

--- uncovered block 127-130 ---
      125 |             /// We pass a header with all physical columns so that createExpressionsAnalyzer
      126 |             /// can build a non-empty StorageDummy (it requires at least one column).
>>    127 |             NamesAndTypesList need_col{{col.name, col.type}};
>>    128 |             auto dag = DB::evaluateMissingDefaults(physical_header, need_col, combined_columns, context_copy);
>>    129 |             if (!dag)
>>    130 |                 continue;
      131 | 
      132 |             dag->addMaterializingOutputActions(/*materialize_sparse=*/ false);

--- uncovered block 132-134 ---
      130 |                 continue;
      131 | 
>>    132 |             dag->addMaterializingOutputActions(/*materialize_sparse=*/ false);
>>    133 |             auto actions = std::make_shared<ExpressionActions>(
>>    134 |                 std::move(*dag), ExpressionActionsSettings(context_copy->getSettingsRef()));
      135 | 
      136 |             /// Collect the physical columns this expression requires.

--- uncovered block 137-141 ---
      135 | 
      136 |             /// Collect the physical columns this expression requires.
>>    137 |             for (const auto & req : actions->getRequiredColumnsWithTypes())
>>    138 |             {
>>    139 |                 if (fallback_columns_set.insert(req.name).second)
>>    140 |                     fallback_columns_list.push_back(req);
>>    141 |             }
      142 | 
      143 |             fallback_expressions.emplace(col.name, std::move(actions));

--- uncovered block 143-144 ---
      141 |             }
      142 | 
>>    143 |             fallback_expressions.emplace(col.name, std::move(actions));
>>    144 |         }
      145 | 
      146 |         if (!fallback_columns_list.empty())

--- uncovered block 146-162 ---
      144 |         }
      145 | 
>>    146 |         if (!fallback_columns_list.empty())
>>    147 |         {
>>    148 |             fallback_reader = createMergeTreeReader(
>>    149 |                 main_reader_->data_part_info_for_read,
>>    150 |                 fallback_columns_list,
>>    151 |                 main_reader_->storage_snapshot,
>>    152 |                 main_reader_->storage_settings,
>>    153 |                 main_reader_->all_mark_ranges,
>>    154 |                 /*virtual_fields=*/{},
>>    155 |                 main_reader_->uncompressed_cache,
>>    156 |                 main_reader_->mark_cache,
>>    157 |                 /*deserialization_prefixes_cache=*/nullptr,
>>    158 |                 main_reader_->settings,
>>    159 |                 /*avg_value_size_hints=*/{},
>>    160 |                 /*profile_callback=*/{});
>>    161 |         }
>>    162 |     }
      163 | }
      164 | 

--- uncovered block 170-170 ---
      168 | 
      169 |     if (fallback_reader)
>>    170 |         fallback_reader->updateAllMarkRanges(ranges);
      171 | 
      172 |     if (granule && !ranges.empty())

--- uncovered block 231-233 ---
      229 |         /// Always return true for empty needles.
      230 |         if (search_query->tokens.empty() && search_query->patterns.empty())
>>    231 |         {
>>    232 |             is_always_true[i] = true;
>>    233 |         }
      234 |         else if (!search_query->patterns.empty())
      235 |         {

--- uncovered block 235-238 ---
      233 |         }
      234 |         else if (!search_query->patterns.empty())
>>    235 |         {
>>    236 |             if (!granule_text.canUseLikeDictionaryScan())
>>    237 |             {
>>    238 |                 if (!fallback_reader)
      239 |                     throw Exception(ErrorCodes::LOGICAL_ERROR, "The fallback reader for patterns is not initialized.");
      240 | 

--- uncovered block 241-249 ---
      239 |                     throw Exception(ErrorCodes::LOGICAL_ERROR, "The fallback reader for patterns is not initialized.");
      240 | 
>>    241 |                 use_fallback[i] = true;
>>    242 |             }
>>    243 |             else
>>    244 |             {
>>    245 |                 const auto & pattern_tokens = granule_text.getPatternTokensForTextQuery(*search_query);
>>    246 |                 for (const auto & token : pattern_tokens)
>>    247 |                     useful_tokens.insert(token);
>>    248 |             }
>>    249 |         }
      250 |         else if (search_query->direct_read_mode == TextIndexDirectReadMode::Exact)
      251 |         {

--- uncovered block 305-307 ---
      303 | 
      304 |     for (const auto & [token, token_info] : pattern_tokens)
>>    305 |     {
>>    306 |         if (granule_text.getPostingsForRareToken(token) || !useful_tokens.contains(token))
>>    307 |             continue;
      308 | 
      309 |         large_postings_streams.emplace(token, make_stream());

--- uncovered block 309-310 ---
      307 |             continue;
      308 | 
>>    309 |         large_postings_streams.emplace(token, make_stream());
>>    310 |     }
      311 | }
      312 | 

--- uncovered block 391-397 ---
      389 |     Block fallback_block;
      390 |     if (any_use_fallback && fallback_reader && max_rows_to_read > 0)
>>    391 |     {
>>    392 |         Columns fallback_cols(fallback_columns_list.size(), nullptr);
>>    393 |         fallback_reader->readRows(from_mark, current_task_last_mark, continue_reading, max_rows_to_read, rows_offset, fallback_cols);
>>    394 |         size_t col_idx = 0;
>>    395 |         for (const auto & col_name_type : fallback_columns_list)
>>    396 |             fallback_block.insert({fallback_cols[col_idx++], col_name_type.type, col_name_type.name});
>>    397 |     }
      398 | 
      399 |     size_t fallback_offset = 0;

--- uncovered block 437-444 ---
      435 |                 }
      436 |                 else if (use_fallback[i] && !fallback_block.empty())
>>    437 |                 {
>>    438 |                     fillColumnFallback(
>>    439 |                         column_mutable,
>>    440 |                         columns_to_read[i].name,
>>    441 |                         fallback_block,
>>    442 |                         fallback_offset,
>>    443 |                         rows_to_read);
>>    444 |                 }
      445 |                 else
      446 |                 {

--- uncovered block 525-527 ---
      523 |             double cardinality = 1.0;
      524 | 
>>    525 |             for (const auto & token : query.tokens)
>>    526 |             {
>>    527 |                 auto it = remaining_tokens.find(token);
      528 |                 /// Same reasoning as above: absent from sparse index ⟹ too common ⟹ treat as all rows.
      529 |                 double token_cardinality = it == remaining_tokens.end() ? static_cast<double>(total_rows) : it->second->cardinality;

--- uncovered block 529-531 ---
      527 |                 auto it = remaining_tokens.find(token);
      528 |                 /// Same reasoning as above: absent from sparse index ⟹ too common ⟹ treat as all rows.
>>    529 |                 double token_cardinality = it == remaining_tokens.end() ? static_cast<double>(total_rows) : it->second->cardinality;
>>    530 |                 cardinality *= (1.0 - (token_cardinality / static_cast<double>(total_rows)));
>>    531 |             }
      532 | 
      533 |             cardinality = static_cast<double>(total_rows) * (1.0 - cardinality);

--- uncovered block 590-590 ---
      588 |     /// Read postings for pattern-matched tokens
      589 |     for (const auto & [token, token_info] : pattern_tokens)
>>    590 |         read_postings_if_needed(token, token_info);
      591 | 
      592 |     return result;

--- uncovered block 646-646 ---
      644 | 
      645 |     for (const auto & [token, token_info] : pattern_tokens)
>>    646 |         cleanup_postings(token, token_info);
      647 | }
      648 | 

--- uncovered block 750-755 ---
      748 | 
      749 |     if (!search_query->patterns.empty())
>>    750 |     {
>>    751 |         const auto & granule_text = assert_cast<const MergeTreeIndexGranuleText &>(*granule);
>>    752 |         std::vector<String> matched_tokens;
>>    753 |         for (const auto & token : granule_text.getPatternTokensForTextQuery(*search_query))
>>    754 |             if (postings.contains(token))
>>    755 |                 matched_tokens.push_back(String(token));
      756 | 
      757 |         if (!matched_tokens.empty())

--- uncovered block 757-758 ---
      755 |                 matched_tokens.push_back(String(token));
      756 | 
>>    757 |         if (!matched_tokens.empty())
>>    758 |             applyPostingsAny(column, postings, indices_buffer, matched_tokens, old_size, row_offset, num_rows);
      759 | 
      760 |         return;

--- uncovered block 760-761 ---
      758 |             applyPostingsAny(column, postings, indices_buffer, matched_tokens, old_size, row_offset, num_rows);
      759 | 
>>    760 |         return;
>>    761 |     }
      762 | 
      763 |     if (postings.empty())

--- uncovered block 767-769 ---
      765 | 
      766 |     if (search_query->tokens.empty())
>>    767 |     {
>>    768 |         return;
>>    769 |     }
      770 |     else if (search_query->search_mode == TextSearchMode::Any)
      771 |     {

--- uncovered block 790-792 ---
      788 |     size_t offset,
      789 |     size_t num_rows) const
>>    790 | {
>>    791 |     auto it = fallback_expressions.find(column_name);
>>    792 |     chassert(it != fallback_expressions.end());
      793 | 
      794 |     /// Build a block slice for this granule: cut [offset, offset + num_rows) from each physical column.

--- uncovered block 795-797 ---
      793 | 
      794 |     /// Build a block slice for this granule: cut [offset, offset + num_rows) from each physical column.
>>    795 |     Block slice;
>>    796 |     for (const auto & col : physical_block)
>>    797 |         slice.insert({col.column->cut(offset, num_rows), col.type, col.name});
      798 | 
      799 |     /// Execute the virtual column's default expression (the original search predicate) on the slice.

--- uncovered block 801-801 ---
      799 |     /// Execute the virtual column's default expression (the original search predicate) on the slice.
      800 |     /// After execution the block contains both the physical columns and the computed virtual column.
>>    801 |     it->second->execute(slice);
      802 | 
      803 |     const auto & result_col = slice.getByName(column_name);

--- uncovered block 803-805 ---
      801 |     it->second->execute(slice);
      802 | 
>>    803 |     const auto & result_col = slice.getByName(column_name);
>>    804 |     const auto & result_data = assert_cast<const ColumnUInt8 &>(*result_col.column).getData();
>>    805 |     chassert(result_data.size() == num_rows);
      806 | 
      807 |     auto & column_data = assert_cast<ColumnUInt8 &>(column).getData();

--- uncovered block 807-811 ---
      805 |     chassert(result_data.size() == num_rows);
      806 | 
>>    807 |     auto & column_data = assert_cast<ColumnUInt8 &>(column).getData();
>>    808 |     const size_t old_size = column_data.size();
>>    809 |     column_data.resize(old_size + num_rows);
>>    810 |     memcpy(&column_data[old_size], result_data.data(), num_rows);
>>    811 | }
      812 | 
      813 | MergeTreeReaderPtr createMergeTreeReaderTextIndex(

=== Lost Baseline Coverage: 7 lines ===

================================================================================
src/Storages/MergeTree/MergeTreeIndexConditionText.cpp
================================================================================

--- lost coverage block 60-60 ---
       58 |     hash.update(search_mode);
       59 |     hash.update(direct_read_mode);
>>     60 | 
       61 |     hash.update(tokens.size());
       62 |     for (const auto & token : tokens)

--- lost coverage block 744-744 ---
      742 |         /// like/notLike optimization is only supported for the SplitByNonAlpha tokenizer.
      743 |         /// Requires explicit opt-in via use_text_index_like_evaluation_by_dictionary_scan because scanning
>>    744 |         /// the index dictionary for pattern-matching tokens has non-trivial overhead.
      745 |         if (tokenizer->getType() == ITokenizer::Type::SplitByNonAlpha && settings[Setting::use_text_index_like_evaluation_by_dictionary_scan]
      746 |             && !has_preprocessor)
================================================================================
src/Storages/MergeTree/MergeTreeReaderTextIndex.cpp
================================================================================

--- lost coverage block 561-561 ---
      559 |     const auto & pattern_tokens = granule_text.getPatternTokens();
      560 |     PostingsMap result;
>>    561 | 
      562 |     const auto read_postings_if_needed = [&](const String & token, const TokenPostingsInfoPtr & token_info)
      563 |     {

--- lost coverage block 566-566 ---
      564 |         if (!useful_tokens.contains(token))
      565 |             return;
>>    566 | 
      567 |         auto token_postings = readPostingsBlocksForToken(token, *token_info, *rows_range);
      568 | 

--- lost coverage block 628-628 ---
      626 |     const auto & remaining_tokens = granule_text.getRemainingTokens();
      627 |     const auto & pattern_tokens = granule_text.getPatternTokens();
>>    628 | 
      629 |     const auto cleanup_postings = [&](const String & token, const TokenPostingsInfoPtr & token_info)
      630 |     {

--- lost coverage block 752-752 ---
      750 |     {
      751 |         const auto & granule_text = assert_cast<const MergeTreeIndexGranuleText &>(*granule);
>>    752 |         std::vector<String> matched_tokens;
      753 |         for (const auto & token : granule_text.getPatternTokensForTextQuery(*search_query))
      754 |             if (postings.contains(token))

--- lost coverage block 811-811 ---
      809 |     column_data.resize(old_size + num_rows);
      810 |     memcpy(&column_data[old_size], result_data.data(), num_rows);
>>    811 | }
      812 | 
      813 | MergeTreeReaderPtr createMergeTreeReaderTextIndex(
WARNING: Failed to get start time for [Print Uncovered Code] - start time and duration won't be set

--- Coverage counts ---
Lines     : baseline 739,656/880,108  →  current 739,969/880,507  (Δ +313 / +399)
Functions : baseline 798,440/878,648  →  current 798,518/878,658  (Δ +78 / +10)
Branches  : baseline 239,765/313,164  →  current 239,896/313,374  (Δ +131 / +210)