BUG #16040: PL/PGSQL RETURN QUERY statement never uses a parallel plan

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

BUG #16040: PL/PGSQL RETURN QUERY statement never uses a parallel plan

PG Bug reporting form
The following bug has been logged on the website:

Bug reference:      16040
Logged by:          Jeremy Smith
Email address:      [hidden email]
PostgreSQL version: 12.0
Operating system:   Official Docker Image, CentOS7
Description:        

I have also tried this with 11.3, 11.4, and 11.5, so this is not new in
12.0.  Here's a really basic way to reproduce this:

postgres=# BEGIN;
BEGIN
postgres=#
postgres=# -- Create a test table and some data
postgres=# CREATE TABLE test (a int);
CREATE TABLE
postgres=# INSERT INTO test SELECT generate_series(1,10);
INSERT 0 10
postgres=# alter table test set (parallel_workers = 4);
ALTER TABLE
postgres=# -- Use auto_explain to show plan of query in the function
postgres=# LOAD 'auto_explain';
LOAD
postgres=# SET auto_explain.log_analyze = on;
SET
postgres=# SET client_min_messages = log;
SET
postgres=# SET auto_explain.log_nested_statements = on;
SET
postgres=# SET auto_explain.log_min_duration = 0;
SET
postgres=# -- Set parallel costs artificially low, for demonstration
purposes
postgres=# set parallel_tuple_cost = 0;
SET
postgres=# set parallel_setup_cost = 0;
SET
postgres=# set max_parallel_workers_per_gather = 4;
SET
postgres=# -- Normal query will use 4 workers
postgres=# SELECT test.a, count(*) FROM test GROUP BY test.a;
LOG:  duration: 19.280 ms  plan:
Query Text: SELECT test.a, count(*) FROM test GROUP BY test.a;
Finalize HashAggregate  (cost=25.56..27.56 rows=200 width=12) (actual
time=16.649..16.795 rows=10 loops=1)
  Group Key: a
  ->  Gather  (cost=19.56..21.56 rows=800 width=12) (actual
time=2.853..18.744 rows=10 loops=1)
        Workers Planned: 4
        Workers Launched: 4
        ->  Partial HashAggregate  (cost=19.56..21.56 rows=200 width=12)
(actual time=0.493..0.519 rows=2 loops=5)
              Group Key: a
              ->  Parallel Seq Scan on test  (cost=0.00..16.38 rows=638
width=4) (actual time=0.009..0.083 rows=2 loops=5)
 a  | count
----+-------
  9 |     1
  3 |     1
  5 |     1
  4 |     1
 10 |     1
  6 |     1
  2 |     1
  7 |     1
  1 |     1
  8 |     1
(10 rows)

postgres=#
postgres=# CREATE OR REPLACE FUNCTION test_count()
postgres-#   RETURNS TABLE (a int, n bigint) AS
postgres-#   $$
postgres$#     BEGIN
postgres$#       RETURN QUERY SELECT test.a, count(*) FROM test GROUP BY
test.a;
postgres$#     END;
postgres$#   $$
postgres-# LANGUAGE PLPGSQL;
CREATE FUNCTION
postgres=#
postgres=# -- This query will not use parallel workers
postgres=# SELECT * FROM test_count();
LOG:  duration: 0.437 ms  plan:
Query Text: SELECT test.a, count(*) FROM test GROUP BY test.a
HashAggregate  (cost=48.25..50.25 rows=200 width=12) (actual
time=0.193..0.276 rows=10 loops=1)
  Group Key: a
  ->  Seq Scan on test  (cost=0.00..35.50 rows=2550 width=4) (actual
time=0.010..0.096 rows=10 loops=1)
LOG:  duration: 1.069 ms  plan:
Query Text: SELECT * FROM test_count();
Function Scan on test_count  (cost=0.25..10.25 rows=1000 width=12) (actual
time=0.895..0.968 rows=10 loops=1)
 a  | n
----+---
  9 | 1
  3 | 1
  5 | 1
  4 | 1
 10 | 1
  6 | 1
  2 | 1
  7 | 1
  1 | 1
  8 | 1
(10 rows)

postgres=# -- A workaround for long-running queries, using CREATE TABLE,
which will run in parallel
postgres=# CREATE OR REPLACE FUNCTION test_count2()
postgres-#   RETURNS TABLE (a int, n bigint) AS
postgres-#   $$
postgres$#     BEGIN
postgres$#       CREATE TEMPORARY TABLE test_count2_temp_table AS
postgres$#         SELECT test.a, count(*) FROM test GROUP BY test.a;
postgres$#       RETURN QUERY select * from test_count2_temp_table;
postgres$#     END;
postgres$#  $$
postgres-# LANGUAGE PLPGSQL;
CREATE FUNCTION
postgres=#
postgres=# -- The CREATE TABLE AS query will use parallel workers, but the
postgres=# -- RETURN QUERY statement will not
postgres=# SELECT * FROM test_count2();
LOG:  duration: 24.139 ms  plan:
Query Text: CREATE TEMPORARY TABLE test_count2_temp_table AS
        SELECT test.a, count(*) FROM test GROUP BY test.a
Finalize HashAggregate  (cost=25.56..27.56 rows=200 width=12) (actual
time=21.819..21.896 rows=10 loops=1)
  Group Key: a
  ->  Gather  (cost=19.56..21.56 rows=800 width=12) (actual
time=0.755..22.966 rows=10 loops=1)
        Workers Planned: 4
        Workers Launched: 4
        ->  Partial HashAggregate  (cost=19.56..21.56 rows=200 width=12)
(actual time=0.105..0.148 rows=2 loops=5)
              Group Key: a
              ->  Parallel Seq Scan on test  (cost=0.00..16.38 rows=638
width=4) (actual time=0.009..0.056 rows=2 loops=5)
LOG:  duration: 0.420 ms  plan:
Query Text: select * from test_count2_temp_table
Seq Scan on test_count2_temp_table  (cost=0.00..30.40 rows=2040 width=12)
(actual time=0.014..0.305 rows=10 loops=1)
LOG:  duration: 26.118 ms  plan:
Query Text: SELECT * FROM test_count2();
Function Scan on test_count2  (cost=0.25..10.25 rows=1000 width=12) (actual
time=25.845..25.994 rows=10 loops=1)
 a  | n
----+---
  9 | 1
  3 | 1
  5 | 1
  4 | 1
 10 | 1
  6 | 1
  2 | 1
  7 | 1
  1 | 1
  8 | 1
(10 rows)



It's not obvious from the documentation
(https://www.postgresql.org/docs/12/when-can-parallel-query-be-used.html)
that this should be the case.  RETURN QUERY is not interruptible, like a
cursor or for loop.