77.39% (493/637) Uncovered changed code (with context): ================================================================================ src/Storages/MergeTree/IMergeTreeReader.cpp ================================================================================ --- uncovered block 181-181 --- 179 | + " of type " + part_storage->getDiskType() + ")"); 180 | throw; >> 181 | } 182 | } 183 | ================================================================================ src/Storages/MergeTree/MergeTreeIndexConditionText.cpp ================================================================================ --- uncovered block 74-76 --- 72 | { 73 | if (const auto & re2 = pattern.getRE2()) >> 74 | { >> 75 | hash.update(re2->pattern()); >> 76 | } 77 | else 78 | { --- uncovered block 536-536 --- 534 | const String value = preprocessor->processConstant(field.safeGet()); 535 | if (value.empty()) >> 536 | return {}; 537 | 538 | const char * data = value.data(); --- uncovered block 548-548 --- 546 | /// Must start with at least one '%'. 547 | if (data[pos] != '%') >> 548 | return {}; 549 | 550 | while (pos < length && data[pos] == '%') ================================================================================ src/Storages/MergeTree/MergeTreeIndexText.cpp ================================================================================ --- uncovered block 475-478 --- 473 | auto sparse_index = loadSparseIndex(header_stream, state); 474 | if (sparse_index->empty()) >> 475 | { >> 476 | can_use_like_dictionary_scan = true; >> 477 | return; >> 478 | } 479 | 480 | const size_t max_postings_to_read = condition_text.getContext()->getSettingsRef()[Setting::text_index_like_max_postings_to_read]; --- uncovered block 526-526 --- 524 | 525 | if (postings_to_read >= max_postings_to_read) >> 526 | { 527 | /// Too many large-posting tokens matched. 528 | /// Not all dictionary blocks were scanned, so the set of matched pattern tokens is incomplete. --- uncovered block 529-530 --- 527 | /// Too many large-posting tokens matched. 528 | /// Not all dictionary blocks were scanned, so the set of matched pattern tokens is incomplete. >> 529 | return; >> 530 | } 531 | } 532 | --- uncovered block 538-538 --- 536 | { 537 | if (query->patterns.empty()) >> 538 | continue; 539 | 540 | if (std::ranges::any_of( --- uncovered block 649-653 --- 647 | 648 | size_t MergeTreeIndexGranuleText::memoryUsageBytes() const >> 649 | { >> 650 | size_t memory_usage_bytes = sizeof(*this) >> 651 | + remaining_tokens.capacity() * sizeof(*remaining_tokens.begin()) >> 652 | + rare_tokens_postings.capacity() * sizeof(*rare_tokens_postings.begin()) >> 653 | + pattern_tokens.capacity() * sizeof(*pattern_tokens.begin()); 654 | 655 | for (const auto & [hash, tokens_vec] : pattern_tokens_per_query) --- uncovered block 655-656 --- 653 | + pattern_tokens.capacity() * sizeof(*pattern_tokens.begin()); 654 | >> 655 | for (const auto & [hash, tokens_vec] : pattern_tokens_per_query) >> 656 | memory_usage_bytes += tokens_vec.capacity() * sizeof(String); 657 | 658 | return memory_usage_bytes; --- uncovered block 658-659 --- 656 | memory_usage_bytes += tokens_vec.capacity() * sizeof(String); 657 | >> 658 | return memory_usage_bytes; >> 659 | } 660 | 661 | bool MergeTreeIndexGranuleText::hasAnyQueryTokens(const TextSearchQuery & query) const --- uncovered block 755-755 --- 753 | { 754 | if (query.patterns.empty()) >> 755 | return false; 756 | 757 | /// If the dictionary scan was cut short (too many large-posting tokens), the set of --- uncovered block 760-760 --- 758 | /// matched pattern tokens is incomplete. Conservatively assume the pattern may match. 759 | if (!can_use_like_dictionary_scan) >> 760 | return true; 761 | 762 | const auto & query_tokens = getPatternTokensForTextQuery(query); --- uncovered block 765-765 --- 763 | 764 | if (!current_range.has_value()) >> 765 | return !query_tokens.empty(); 766 | 767 | PostingList range_posting; --- uncovered block 783-783 --- 781 | 782 | if (!has_any_range) >> 783 | continue; 784 | 785 | /// We read postings only for tokens that have one block. ================================================================================ src/Storages/MergeTree/MergeTreeIndexText.h ================================================================================ --- uncovered block 311-311 --- 309 | const std::vector & getPatternTokensForTextQuery(const TextSearchQuery & query) const; 310 | PostingListPtr getPostingsForRareToken(std::string_view token) const; >> 311 | bool canUseLikeDictionaryScan() const { return can_use_like_dictionary_scan; } 312 | void setCurrentRange(RowsRange range) { current_range = std::move(range); } 313 | const String & getIndexIdForCaches() const { return index_id_for_caches; } ================================================================================ src/Storages/MergeTree/MergeTreeReaderTextIndex.cpp ================================================================================ --- uncovered block 97-97 --- 95 | const auto & condition_text = assert_cast(*index.condition); 96 | if (!condition_text.getAllSearchPatterns().empty()) >> 97 | { 98 | /// Build a fallback evaluation path for when the dictionary scan is cut short 99 | /// (too many pattern-matching tokens exceed text_index_like_max_postings_to_read). --- uncovered block 106-107 --- 104 | /// (the original search predicate) and determine the required physical columns 105 | /// from it. The fallback reader is then created for those physical columns only. >> 106 | auto context_copy = createContextForDefaultExpressions(); >> 107 | auto combined_columns = buildCombinedColumnsForDefaultExpressions(); 108 | 109 | /// Build a header block containing all physical columns (column type only, no data). --- uncovered block 113-115 --- 111 | /// a StorageDummy from it — StorageDummy requires at least one column, so the header 112 | /// must be non-empty. >> 113 | Block physical_header; >> 114 | for (const auto & phys_col : storage_snapshot->metadata->getColumns().getAllPhysical()) >> 115 | physical_header.insert({phys_col.type->createColumn(), phys_col.type, phys_col.name}); 116 | 117 | NameSet fallback_columns_set; --- uncovered block 117-122 --- 115 | physical_header.insert({phys_col.type->createColumn(), phys_col.type, phys_col.name}); 116 | >> 117 | NameSet fallback_columns_set; >> 118 | for (const auto & col : columns_) >> 119 | { >> 120 | auto search_query = condition_text.getSearchQueryForVirtualColumn(col.name); >> 121 | if (!search_query || search_query->patterns.empty()) >> 122 | continue; 123 | 124 | /// Compile the virtual column's default expression (the original search predicate). --- uncovered block 127-130 --- 125 | /// We pass a header with all physical columns so that createExpressionsAnalyzer 126 | /// can build a non-empty StorageDummy (it requires at least one column). >> 127 | NamesAndTypesList need_col{{col.name, col.type}}; >> 128 | auto dag = DB::evaluateMissingDefaults(physical_header, need_col, combined_columns, context_copy); >> 129 | if (!dag) >> 130 | continue; 131 | 132 | dag->addMaterializingOutputActions(/*materialize_sparse=*/ false); --- uncovered block 132-134 --- 130 | continue; 131 | >> 132 | dag->addMaterializingOutputActions(/*materialize_sparse=*/ false); >> 133 | auto actions = std::make_shared( >> 134 | std::move(*dag), ExpressionActionsSettings(context_copy->getSettingsRef())); 135 | 136 | /// Collect the physical columns this expression requires. --- uncovered block 137-141 --- 135 | 136 | /// Collect the physical columns this expression requires. >> 137 | for (const auto & req : actions->getRequiredColumnsWithTypes()) >> 138 | { >> 139 | if (fallback_columns_set.insert(req.name).second) >> 140 | fallback_columns_list.push_back(req); >> 141 | } 142 | 143 | fallback_expressions.emplace(col.name, std::move(actions)); --- uncovered block 143-144 --- 141 | } 142 | >> 143 | fallback_expressions.emplace(col.name, std::move(actions)); >> 144 | } 145 | 146 | if (!fallback_columns_list.empty()) --- uncovered block 146-162 --- 144 | } 145 | >> 146 | if (!fallback_columns_list.empty()) >> 147 | { >> 148 | fallback_reader = createMergeTreeReader( >> 149 | main_reader_->data_part_info_for_read, >> 150 | fallback_columns_list, >> 151 | main_reader_->storage_snapshot, >> 152 | main_reader_->storage_settings, >> 153 | main_reader_->all_mark_ranges, >> 154 | /*virtual_fields=*/{}, >> 155 | main_reader_->uncompressed_cache, >> 156 | main_reader_->mark_cache, >> 157 | /*deserialization_prefixes_cache=*/nullptr, >> 158 | main_reader_->settings, >> 159 | /*avg_value_size_hints=*/{}, >> 160 | /*profile_callback=*/{}); >> 161 | } >> 162 | } 163 | } 164 | --- uncovered block 170-170 --- 168 | 169 | if (fallback_reader) >> 170 | fallback_reader->updateAllMarkRanges(ranges); 171 | 172 | if (granule && !ranges.empty()) --- uncovered block 231-233 --- 229 | /// Always return true for empty needles. 230 | if (search_query->tokens.empty() && search_query->patterns.empty()) >> 231 | { >> 232 | is_always_true[i] = true; >> 233 | } 234 | else if (!search_query->patterns.empty()) 235 | { --- uncovered block 235-238 --- 233 | } 234 | else if (!search_query->patterns.empty()) >> 235 | { >> 236 | if (!granule_text.canUseLikeDictionaryScan()) >> 237 | { >> 238 | if (!fallback_reader) 239 | throw Exception(ErrorCodes::LOGICAL_ERROR, "The fallback reader for patterns is not initialized."); 240 | --- uncovered block 241-249 --- 239 | throw Exception(ErrorCodes::LOGICAL_ERROR, "The fallback reader for patterns is not initialized."); 240 | >> 241 | use_fallback[i] = true; >> 242 | } >> 243 | else >> 244 | { >> 245 | const auto & pattern_tokens = granule_text.getPatternTokensForTextQuery(*search_query); >> 246 | for (const auto & token : pattern_tokens) >> 247 | useful_tokens.insert(token); >> 248 | } >> 249 | } 250 | else if (search_query->direct_read_mode == TextIndexDirectReadMode::Exact) 251 | { --- uncovered block 305-307 --- 303 | 304 | for (const auto & [token, token_info] : pattern_tokens) >> 305 | { >> 306 | if (granule_text.getPostingsForRareToken(token) || !useful_tokens.contains(token)) >> 307 | continue; 308 | 309 | large_postings_streams.emplace(token, make_stream()); --- uncovered block 309-310 --- 307 | continue; 308 | >> 309 | large_postings_streams.emplace(token, make_stream()); >> 310 | } 311 | } 312 | --- uncovered block 391-397 --- 389 | Block fallback_block; 390 | if (any_use_fallback && fallback_reader && max_rows_to_read > 0) >> 391 | { >> 392 | Columns fallback_cols(fallback_columns_list.size(), nullptr); >> 393 | fallback_reader->readRows(from_mark, current_task_last_mark, continue_reading, max_rows_to_read, rows_offset, fallback_cols); >> 394 | size_t col_idx = 0; >> 395 | for (const auto & col_name_type : fallback_columns_list) >> 396 | fallback_block.insert({fallback_cols[col_idx++], col_name_type.type, col_name_type.name}); >> 397 | } 398 | 399 | size_t fallback_offset = 0; --- uncovered block 437-444 --- 435 | } 436 | else if (use_fallback[i] && !fallback_block.empty()) >> 437 | { >> 438 | fillColumnFallback( >> 439 | column_mutable, >> 440 | columns_to_read[i].name, >> 441 | fallback_block, >> 442 | fallback_offset, >> 443 | rows_to_read); >> 444 | } 445 | else 446 | { --- uncovered block 525-527 --- 523 | double cardinality = 1.0; 524 | >> 525 | for (const auto & token : query.tokens) >> 526 | { >> 527 | auto it = remaining_tokens.find(token); 528 | /// Same reasoning as above: absent from sparse index ⟹ too common ⟹ treat as all rows. 529 | double token_cardinality = it == remaining_tokens.end() ? static_cast(total_rows) : it->second->cardinality; --- uncovered block 529-531 --- 527 | auto it = remaining_tokens.find(token); 528 | /// Same reasoning as above: absent from sparse index ⟹ too common ⟹ treat as all rows. >> 529 | double token_cardinality = it == remaining_tokens.end() ? static_cast(total_rows) : it->second->cardinality; >> 530 | cardinality *= (1.0 - (token_cardinality / static_cast(total_rows))); >> 531 | } 532 | 533 | cardinality = static_cast(total_rows) * (1.0 - cardinality); --- uncovered block 590-590 --- 588 | /// Read postings for pattern-matched tokens 589 | for (const auto & [token, token_info] : pattern_tokens) >> 590 | read_postings_if_needed(token, token_info); 591 | 592 | return result; --- uncovered block 646-646 --- 644 | 645 | for (const auto & [token, token_info] : pattern_tokens) >> 646 | cleanup_postings(token, token_info); 647 | } 648 | --- uncovered block 750-755 --- 748 | 749 | if (!search_query->patterns.empty()) >> 750 | { >> 751 | const auto & granule_text = assert_cast(*granule); >> 752 | std::vector matched_tokens; >> 753 | for (const auto & token : granule_text.getPatternTokensForTextQuery(*search_query)) >> 754 | if (postings.contains(token)) >> 755 | matched_tokens.push_back(String(token)); 756 | 757 | if (!matched_tokens.empty()) --- uncovered block 757-758 --- 755 | matched_tokens.push_back(String(token)); 756 | >> 757 | if (!matched_tokens.empty()) >> 758 | applyPostingsAny(column, postings, indices_buffer, matched_tokens, old_size, row_offset, num_rows); 759 | 760 | return; --- uncovered block 760-761 --- 758 | applyPostingsAny(column, postings, indices_buffer, matched_tokens, old_size, row_offset, num_rows); 759 | >> 760 | return; >> 761 | } 762 | 763 | if (postings.empty()) --- uncovered block 767-769 --- 765 | 766 | if (search_query->tokens.empty()) >> 767 | { >> 768 | return; >> 769 | } 770 | else if (search_query->search_mode == TextSearchMode::Any) 771 | { --- uncovered block 790-792 --- 788 | size_t offset, 789 | size_t num_rows) const >> 790 | { >> 791 | auto it = fallback_expressions.find(column_name); >> 792 | chassert(it != fallback_expressions.end()); 793 | 794 | /// Build a block slice for this granule: cut [offset, offset + num_rows) from each physical column. --- uncovered block 795-797 --- 793 | 794 | /// Build a block slice for this granule: cut [offset, offset + num_rows) from each physical column. >> 795 | Block slice; >> 796 | for (const auto & col : physical_block) >> 797 | slice.insert({col.column->cut(offset, num_rows), col.type, col.name}); 798 | 799 | /// Execute the virtual column's default expression (the original search predicate) on the slice. --- uncovered block 801-801 --- 799 | /// Execute the virtual column's default expression (the original search predicate) on the slice. 800 | /// After execution the block contains both the physical columns and the computed virtual column. >> 801 | it->second->execute(slice); 802 | 803 | const auto & result_col = slice.getByName(column_name); --- uncovered block 803-805 --- 801 | it->second->execute(slice); 802 | >> 803 | const auto & result_col = slice.getByName(column_name); >> 804 | const auto & result_data = assert_cast(*result_col.column).getData(); >> 805 | chassert(result_data.size() == num_rows); 806 | 807 | auto & column_data = assert_cast(column).getData(); --- uncovered block 807-811 --- 805 | chassert(result_data.size() == num_rows); 806 | >> 807 | auto & column_data = assert_cast(column).getData(); >> 808 | const size_t old_size = column_data.size(); >> 809 | column_data.resize(old_size + num_rows); >> 810 | memcpy(&column_data[old_size], result_data.data(), num_rows); >> 811 | } 812 | 813 | MergeTreeReaderPtr createMergeTreeReaderTextIndex( === Lost Baseline Coverage: 7 lines === ================================================================================ src/Storages/MergeTree/MergeTreeIndexConditionText.cpp ================================================================================ --- lost coverage block 60-60 --- 58 | hash.update(search_mode); 59 | hash.update(direct_read_mode); >> 60 | 61 | hash.update(tokens.size()); 62 | for (const auto & token : tokens) --- lost coverage block 744-744 --- 742 | /// like/notLike optimization is only supported for the SplitByNonAlpha tokenizer. 743 | /// Requires explicit opt-in via use_text_index_like_evaluation_by_dictionary_scan because scanning >> 744 | /// the index dictionary for pattern-matching tokens has non-trivial overhead. 745 | if (tokenizer->getType() == ITokenizer::Type::SplitByNonAlpha && settings[Setting::use_text_index_like_evaluation_by_dictionary_scan] 746 | && !has_preprocessor) ================================================================================ src/Storages/MergeTree/MergeTreeReaderTextIndex.cpp ================================================================================ --- lost coverage block 561-561 --- 559 | const auto & pattern_tokens = granule_text.getPatternTokens(); 560 | PostingsMap result; >> 561 | 562 | const auto read_postings_if_needed = [&](const String & token, const TokenPostingsInfoPtr & token_info) 563 | { --- lost coverage block 566-566 --- 564 | if (!useful_tokens.contains(token)) 565 | return; >> 566 | 567 | auto token_postings = readPostingsBlocksForToken(token, *token_info, *rows_range); 568 | --- lost coverage block 628-628 --- 626 | const auto & remaining_tokens = granule_text.getRemainingTokens(); 627 | const auto & pattern_tokens = granule_text.getPatternTokens(); >> 628 | 629 | const auto cleanup_postings = [&](const String & token, const TokenPostingsInfoPtr & token_info) 630 | { --- lost coverage block 752-752 --- 750 | { 751 | const auto & granule_text = assert_cast(*granule); >> 752 | std::vector matched_tokens; 753 | for (const auto & token : granule_text.getPatternTokensForTextQuery(*search_query)) 754 | if (postings.contains(token)) --- lost coverage block 811-811 --- 809 | column_data.resize(old_size + num_rows); 810 | memcpy(&column_data[old_size], result_data.data(), num_rows); >> 811 | } 812 | 813 | MergeTreeReaderPtr createMergeTreeReaderTextIndex( WARNING: Failed to get start time for [Print Uncovered Code] - start time and duration won't be set --- Coverage counts --- Lines : baseline 739,656/880,108 → current 739,969/880,507 (Δ +313 / +399) Functions : baseline 798,440/878,648 → current 798,518/878,658 (Δ +78 / +10) Branches : baseline 239,765/313,164 → current 239,896/313,374 (Δ +131 / +210)