PostgreSQL: comparación de cadena insensible a las mayúsculas

11

En primer lugar, lo que no se debe hacer, no utilice iLike ...

create table y 
(
id serial not null, 
email text not null unique 
); 

insert into y(email) 
values('[email protected]') ,('[email protected]'); 
insert into y(email) 
select n from generate_series(1,1000) as i(n); 
create index ix_y on y(email); 

explain select * from y 
where email ilike 
    ANY(ARRAY['[email protected]','[email protected]']);

plan de ejecución:

memdb=# explain select * from y where email ilike ANY(ARRAY['[email protected]','[email protected]']); 
             QUERY PLAN          
---------------------------------------------------------------------------------------- 
Seq Scan on y (cost=0.00..17.52 rows=1 width=7) 
    Filter: (email ~~* ANY ('{[email protected],[email protected]}'::text[])) 
(2 rows)

Es o se crea una menor expresión indexada ...

create function lower(t text[]) returns text[] 
as 
$$ 
select lower($1::text)::text[] 
$$ language sql; 

create unique index ix_y_2 on y(lower(email)); 

explain select * from y 
where lower(email) = 
    ANY(lower(ARRAY['[email protected]','[email protected]']));

... que utiliza correctamente el índice:

memdb=# explain select * from y where lower(email) = ANY(lower(ARRAY['[email protected]','[email protected]'])); 
                  QUERY PLAN               
-------------------------------------------------------------------------------------------------------------------------------- 
Bitmap Heap Scan on y (cost=22.60..27.98 rows=10 width=7) 
    Recheck Cond: (lower(email) = ANY ((lower(('{[email protected],[email protected]}'::text[])::text))::text[])) 
    -> Bitmap Index Scan on ix_y_2 (cost=0.00..22.60 rows=10 width=0) 
     Index Cond: (lower(email) = ANY ((lower(('{[email protected],[email protected]}'::text[])::text))::text[])) 
(4 rows)

O utiliza el tipo de datos Citext ...

create table x 
(
id serial not null, 
email citext not null unique 
); 

insert into x(email) 
values('[email protected]'),('[email protected]'); 
insert into x(email) 
select n from generate_series(1,1000) as i(n); 
create index ix_x on x(email); 

explain select * from x 
where email = 
ANY(ARRAY['[email protected]','[email protected]']::citext[]);

... que utiliza correctamente el índice aún no se crea un índice en la expresión (por ejemplo, zzz crear índice en yyy (inferior (campo))):

memdb=# explain select * from x where email = ANY(ARRAY['[email protected]','[email protected]']::citext[]); 
              QUERY PLAN            
-------------------------------------------------------------------------------------------------- 
Bitmap Heap Scan on x (cost=8.52..12.75 rows=2 width=7) 
    Recheck Cond: (email = ANY ('{[email protected],[email protected]}'::citext[])) 
    -> Bitmap Index Scan on ix_x (cost=0.00..8.52 rows=2 width=0) 
     Index Cond: (email = ANY ('{[email protected],[email protected]}'::citext[])) 
(4 rows)

Fuente

2013-05-07 11:49:21

+0

Tenga en cuenta que _can_ puede hacer que 'ILIKE' utilice un índice cuando se usa un índice de trigram: https://www.postgresql.org/docs/current/static /pgtrgm.html (aunque el índice B-Tree será más rápido de actualizar y más pequeño también) –

+0

También no es necesario crear un índice en 'email' si lo declaras como' único' - eso ya habrá creado un índice sobre él. –

9

Utilice el tipo de datos de texto insensible a las mayúsculas y minúsculas. Uso Citext:

create table emails 
(
user_id int references users(user_id) 
email citext 
); 

insert into emails(user_id, email) values(1, '[email protected]'); 
insert into emails(user_id, email) values(2, '[email protected]'); 

select * from emails where email in ('[email protected]','[email protected]');

En caso de que no se puede encontrar el citext.sql en su directorio contrib, copiar y pegar esto en su pgAdmin:

/* $PostgreSQL: pgsql/contrib/citext/citext.sql.in,v 1.3 2008/09/05 18:25:16 tgl Exp $ */ 

-- Adjust this setting to control where the objects get created. 
SET search_path = public; 

-- 
-- PostgreSQL code for CITEXT. 
-- 
-- Most I/O functions, and a few others, piggyback on the "text" type 
-- functions via the implicit cast to text. 
-- 

-- 
-- Shell type to keep things a bit quieter. 
-- 

CREATE TYPE citext; 

-- 
-- Input and output functions. 
-- 
CREATE OR REPLACE FUNCTION citextin(cstring) 
RETURNS citext 
AS 'textin' 
LANGUAGE internal IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION citextout(citext) 
RETURNS cstring 
AS 'textout' 
LANGUAGE internal IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION citextrecv(internal) 
RETURNS citext 
AS 'textrecv' 
LANGUAGE internal STABLE STRICT; 

CREATE OR REPLACE FUNCTION citextsend(citext) 
RETURNS bytea 
AS 'textsend' 
LANGUAGE internal STABLE STRICT; 

-- 
-- The type itself. 
-- 

CREATE TYPE citext (
    INPUT   = citextin, 
    OUTPUT   = citextout, 
    RECEIVE  = citextrecv, 
    SEND   = citextsend, 
    INTERNALLENGTH = VARIABLE, 
    STORAGE  = extended, 
    -- make it a non-preferred member of string type category 
    CATEGORY  = 'S', 
    PREFERRED  = false 
); 

-- 
-- Type casting functions for those situations where the I/O casts don't 
-- automatically kick in. 
-- 

CREATE OR REPLACE FUNCTION citext(bpchar) 
RETURNS citext 
AS 'rtrim1' 
LANGUAGE internal IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION citext(boolean) 
RETURNS citext 
AS 'booltext' 
LANGUAGE internal IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION citext(inet) 
RETURNS citext 
AS 'network_show' 
LANGUAGE internal IMMUTABLE STRICT; 

-- 
-- Implicit and assignment type casts. 
-- 

CREATE CAST (citext AS text) WITHOUT FUNCTION AS IMPLICIT; 
CREATE CAST (citext AS varchar) WITHOUT FUNCTION AS IMPLICIT; 
CREATE CAST (citext AS bpchar) WITHOUT FUNCTION AS ASSIGNMENT; 
CREATE CAST (text AS citext) WITHOUT FUNCTION AS ASSIGNMENT; 
CREATE CAST (varchar AS citext) WITHOUT FUNCTION AS ASSIGNMENT; 
CREATE CAST (bpchar AS citext) WITH FUNCTION citext(bpchar) AS ASSIGNMENT; 
CREATE CAST (boolean AS citext) WITH FUNCTION citext(boolean) AS ASSIGNMENT; 
CREATE CAST (inet AS citext) WITH FUNCTION citext(inet) AS ASSIGNMENT; 

-- 
-- Operator Functions. 
-- 

CREATE OR REPLACE FUNCTION citext_eq(citext, citext) 
RETURNS bool 
AS '$libdir/citext' 
LANGUAGE C IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION citext_ne(citext, citext) 
RETURNS bool 
AS '$libdir/citext' 
LANGUAGE C IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION citext_lt(citext, citext) 
RETURNS bool 
AS '$libdir/citext' 
LANGUAGE C IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION citext_le(citext, citext) 
RETURNS bool 
AS '$libdir/citext' 
LANGUAGE C IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION citext_gt(citext, citext) 
RETURNS bool 
AS '$libdir/citext' 
LANGUAGE C IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION citext_ge(citext, citext) 
RETURNS bool 
AS '$libdir/citext' 
LANGUAGE C IMMUTABLE STRICT; 

-- 
-- Operators. 
-- 

CREATE OPERATOR = (
    LEFTARG = CITEXT, 
    RIGHTARG = CITEXT, 
    COMMUTATOR = =, 
    NEGATOR = <>, 
    PROCEDURE = citext_eq, 
    RESTRICT = eqsel, 
    JOIN  = eqjoinsel, 
    HASHES, 
    MERGES 
); 

CREATE OPERATOR <> (
    LEFTARG = CITEXT, 
    RIGHTARG = CITEXT, 
    NEGATOR = =, 
    COMMUTATOR = <>, 
    PROCEDURE = citext_ne, 
    RESTRICT = neqsel, 
    JOIN  = neqjoinsel 
); 

CREATE OPERATOR < (
    LEFTARG = CITEXT, 
    RIGHTARG = CITEXT, 
    NEGATOR = >=, 
    COMMUTATOR = >, 
    PROCEDURE = citext_lt, 
    RESTRICT = scalarltsel, 
    JOIN  = scalarltjoinsel 
); 

CREATE OPERATOR <= (
    LEFTARG = CITEXT, 
    RIGHTARG = CITEXT, 
    NEGATOR = >, 
    COMMUTATOR = >=, 
    PROCEDURE = citext_le, 
    RESTRICT = scalarltsel, 
    JOIN  = scalarltjoinsel 
); 

CREATE OPERATOR >= (
    LEFTARG = CITEXT, 
    RIGHTARG = CITEXT, 
    NEGATOR = <, 
    COMMUTATOR = <=, 
    PROCEDURE = citext_ge, 
    RESTRICT = scalargtsel, 
    JOIN  = scalargtjoinsel 
); 

CREATE OPERATOR > (
    LEFTARG = CITEXT, 
    RIGHTARG = CITEXT, 
    NEGATOR = <=, 
    COMMUTATOR = <, 
    PROCEDURE = citext_gt, 
    RESTRICT = scalargtsel, 
    JOIN  = scalargtjoinsel 
); 

-- 
-- Support functions for indexing. 
-- 

CREATE OR REPLACE FUNCTION citext_cmp(citext, citext) 
RETURNS int4 
AS '$libdir/citext' 
LANGUAGE C STRICT IMMUTABLE; 

CREATE OR REPLACE FUNCTION citext_hash(citext) 
RETURNS int4 
AS '$libdir/citext' 
LANGUAGE C STRICT IMMUTABLE; 

-- 
-- The btree indexing operator class. 
-- 

CREATE OPERATOR CLASS citext_ops 
DEFAULT FOR TYPE CITEXT USING btree AS 
    OPERATOR 1 < (citext, citext), 
    OPERATOR 2 <= (citext, citext), 
    OPERATOR 3 = (citext, citext), 
    OPERATOR 4 >= (citext, citext), 
    OPERATOR 5 > (citext, citext), 
    FUNCTION 1 citext_cmp(citext, citext); 

-- 
-- The hash indexing operator class. 
-- 

CREATE OPERATOR CLASS citext_ops 
DEFAULT FOR TYPE citext USING hash AS 
    OPERATOR 1 = (citext, citext), 
    FUNCTION 1 citext_hash(citext); 

-- 
-- Aggregates. 
-- 

CREATE OR REPLACE FUNCTION citext_smaller(citext, citext) 
RETURNS citext 
AS '$libdir/citext' 
LANGUAGE 'C' IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION citext_larger(citext, citext) 
RETURNS citext 
AS '$libdir/citext' 
LANGUAGE 'C' IMMUTABLE STRICT; 

CREATE AGGREGATE min(citext) (
    SFUNC = citext_smaller, 
    STYPE = citext, 
    SORTOP = < 
); 

CREATE AGGREGATE max(citext) (
    SFUNC = citext_larger, 
    STYPE = citext, 
    SORTOP = > 
); 

-- 
-- CITEXT pattern matching. 
-- 

CREATE OR REPLACE FUNCTION texticlike(citext, citext) 
RETURNS bool AS 'texticlike' 
LANGUAGE internal IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION texticnlike(citext, citext) 
RETURNS bool AS 'texticnlike' 
LANGUAGE internal IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION texticregexeq(citext, citext) 
RETURNS bool AS 'texticregexeq' 
LANGUAGE internal IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION texticregexne(citext, citext) 
RETURNS bool AS 'texticregexne' 
LANGUAGE internal IMMUTABLE STRICT; 

CREATE OPERATOR ~ (
    PROCEDURE = texticregexeq, 
    LEFTARG = citext, 
    RIGHTARG = citext, 
    NEGATOR = !~, 
    RESTRICT = icregexeqsel, 
    JOIN  = icregexeqjoinsel 
); 

CREATE OPERATOR ~* (
    PROCEDURE = texticregexeq, 
    LEFTARG = citext, 
    RIGHTARG = citext, 
    NEGATOR = !~*, 
    RESTRICT = icregexeqsel, 
    JOIN  = icregexeqjoinsel 
); 

CREATE OPERATOR !~ (
    PROCEDURE = texticregexne, 
    LEFTARG = citext, 
    RIGHTARG = citext, 
    NEGATOR = ~, 
    RESTRICT = icregexnesel, 
    JOIN  = icregexnejoinsel 
); 

CREATE OPERATOR !~* (
    PROCEDURE = texticregexne, 
    LEFTARG = citext, 
    RIGHTARG = citext, 
    NEGATOR = ~*, 
    RESTRICT = icregexnesel, 
    JOIN  = icregexnejoinsel 
); 

CREATE OPERATOR ~~ (
    PROCEDURE = texticlike, 
    LEFTARG = citext, 
    RIGHTARG = citext, 
    NEGATOR = !~~, 
    RESTRICT = iclikesel, 
    JOIN  = iclikejoinsel 
); 

CREATE OPERATOR ~~* (
    PROCEDURE = texticlike, 
    LEFTARG = citext, 
    RIGHTARG = citext, 
    NEGATOR = !~~*, 
    RESTRICT = iclikesel, 
    JOIN  = iclikejoinsel 
); 

CREATE OPERATOR !~~ (
    PROCEDURE = texticnlike, 
    LEFTARG = citext, 
    RIGHTARG = citext, 
    NEGATOR = ~~, 
    RESTRICT = icnlikesel, 
    JOIN  = icnlikejoinsel 
); 

CREATE OPERATOR !~~* (
    PROCEDURE = texticnlike, 
    LEFTARG = citext, 
    RIGHTARG = citext, 
    NEGATOR = ~~*, 
    RESTRICT = icnlikesel, 
    JOIN  = icnlikejoinsel 
); 

-- 
-- Matching citext to text. 
-- 

CREATE OR REPLACE FUNCTION texticlike(citext, text) 
RETURNS bool AS 'texticlike' 
LANGUAGE internal IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION texticnlike(citext, text) 
RETURNS bool AS 'texticnlike' 
LANGUAGE internal IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION texticregexeq(citext, text) 
RETURNS bool AS 'texticregexeq' 
LANGUAGE internal IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION texticregexne(citext, text) 
RETURNS bool AS 'texticregexne' 
LANGUAGE internal IMMUTABLE STRICT; 

CREATE OPERATOR ~ (
    PROCEDURE = texticregexeq, 
    LEFTARG = citext, 
    RIGHTARG = text, 
    NEGATOR = !~, 
    RESTRICT = icregexeqsel, 
    JOIN  = icregexeqjoinsel 
); 

CREATE OPERATOR ~* (
    PROCEDURE = texticregexeq, 
    LEFTARG = citext, 
    RIGHTARG = text, 
    NEGATOR = !~*, 
    RESTRICT = icregexeqsel, 
    JOIN  = icregexeqjoinsel 
); 

CREATE OPERATOR !~ (
    PROCEDURE = texticregexne, 
    LEFTARG = citext, 
    RIGHTARG = text, 
    NEGATOR = ~, 
    RESTRICT = icregexnesel, 
    JOIN  = icregexnejoinsel 
); 

CREATE OPERATOR !~* (
    PROCEDURE = texticregexne, 
    LEFTARG = citext, 
    RIGHTARG = text, 
    NEGATOR = ~*, 
    RESTRICT = icregexnesel, 
    JOIN  = icregexnejoinsel 
); 

CREATE OPERATOR ~~ (
    PROCEDURE = texticlike, 
    LEFTARG = citext, 
    RIGHTARG = text, 
    NEGATOR = !~~, 
    RESTRICT = iclikesel, 
    JOIN  = iclikejoinsel 
); 

CREATE OPERATOR ~~* (
    PROCEDURE = texticlike, 
    LEFTARG = citext, 
    RIGHTARG = text, 
    NEGATOR = !~~*, 
    RESTRICT = iclikesel, 
    JOIN  = iclikejoinsel 
); 

CREATE OPERATOR !~~ (
    PROCEDURE = texticnlike, 
    LEFTARG = citext, 
    RIGHTARG = text, 
    NEGATOR = ~~, 
    RESTRICT = icnlikesel, 
    JOIN  = icnlikejoinsel 
); 

CREATE OPERATOR !~~* (
    PROCEDURE = texticnlike, 
    LEFTARG = citext, 
    RIGHTARG = text, 
    NEGATOR = ~~*, 
    RESTRICT = icnlikesel, 
    JOIN  = icnlikejoinsel 
); 

-- 
-- Matching citext in string comparison functions. 
-- XXX TODO Ideally these would be implemented in C. 
-- 

CREATE OR REPLACE FUNCTION regexp_matches(citext, citext) RETURNS TEXT[] AS $$ 
    SELECT pg_catalog.regexp_matches($1::pg_catalog.text, $2::pg_catalog.text, 'i'); 
$$ LANGUAGE SQL IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION regexp_matches(citext, citext, text) RETURNS TEXT[] AS $$ 
    SELECT pg_catalog.regexp_matches($1::pg_catalog.text, $2::pg_catalog.text, CASE WHEN pg_catalog.strpos($3, 'c') = 0 THEN $3 || 'i' ELSE $3 END); 
$$ LANGUAGE SQL IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION regexp_replace(citext, citext, text) returns TEXT AS $$ 
    SELECT pg_catalog.regexp_replace($1::pg_catalog.text, $2::pg_catalog.text, $3, 'i'); 
$$ LANGUAGE SQL IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION regexp_replace(citext, citext, text, text) returns TEXT AS $$ 
    SELECT pg_catalog.regexp_replace($1::pg_catalog.text, $2::pg_catalog.text, $3, CASE WHEN pg_catalog.strpos($4, 'c') = 0 THEN $4 || 'i' ELSE $4 END); 
$$ LANGUAGE SQL IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION regexp_split_to_array(citext, citext) RETURNS TEXT[] AS $$ 
    SELECT pg_catalog.regexp_split_to_array($1::pg_catalog.text, $2::pg_catalog.text, 'i'); 
$$ LANGUAGE SQL IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION regexp_split_to_array(citext, citext, text) RETURNS TEXT[] AS $$ 
    SELECT pg_catalog.regexp_split_to_array($1::pg_catalog.text, $2::pg_catalog.text, CASE WHEN pg_catalog.strpos($3, 'c') = 0 THEN $3 || 'i' ELSE $3 END); 
$$ LANGUAGE SQL IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION regexp_split_to_table(citext, citext) RETURNS SETOF TEXT AS $$ 
    SELECT pg_catalog.regexp_split_to_table($1::pg_catalog.text, $2::pg_catalog.text, 'i'); 
$$ LANGUAGE SQL IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION regexp_split_to_table(citext, citext, text) RETURNS SETOF TEXT AS $$ 
    SELECT pg_catalog.regexp_split_to_table($1::pg_catalog.text, $2::pg_catalog.text, CASE WHEN pg_catalog.strpos($3, 'c') = 0 THEN $3 || 'i' ELSE $3 END); 
$$ LANGUAGE SQL IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION strpos(citext, citext) RETURNS INT AS $$ 
    SELECT pg_catalog.strpos(pg_catalog.lower($1::pg_catalog.text), pg_catalog.lower($2::pg_catalog.text)); 
$$ LANGUAGE SQL IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION replace(citext, citext, citext) RETURNS TEXT AS $$ 
    SELECT pg_catalog.regexp_replace($1::pg_catalog.text, pg_catalog.regexp_replace($2::pg_catalog.text, '([^a-zA-Z_0-9])', E'\\\\\\1', 'g'), $3::pg_catalog.text, 'gi'); 
$$ LANGUAGE SQL IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION split_part(citext, citext, int) RETURNS TEXT AS $$ 
    SELECT (pg_catalog.regexp_split_to_array($1::pg_catalog.text, pg_catalog.regexp_replace($2::pg_catalog.text, '([^a-zA-Z_0-9])', E'\\\\\\1', 'g'), 'i'))[$3]; 
$$ LANGUAGE SQL IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION translate(citext, citext, text) RETURNS TEXT AS $$ 
    SELECT pg_catalog.translate(pg_catalog.translate($1::pg_catalog.text, pg_catalog.lower($2::pg_catalog.text), $3), pg_catalog.upper($2::pg_catalog.text), $3); 
$$ LANGUAGE SQL IMMUTABLE STRICT;

Fuente

2010-12-19 09:13:55

+2

'create extension" citext ";' instalará el módulo –

38

select * 
where email ilike '[email protected]'

ilike es similar a like pero entre mayúsculas y minúsculas . Para utilizar carácter de escape replace()

where email ilike replace(replace(replace($1, '~', '~~'), '%', '~%'), '_', '~_') escape '~'

o se puede crear una función para escapar de texto; para una matriz de texto use

where email ilike any(array['[email protected]', '[email protected]'])

Fuente

2010-12-19 09:32:15 Bonshington

+0

+1 el operador 'any' es justo lo que estaba buscando. ¡Gracias! –

+3

'LIKE' y' ILIKE' son bastante diferentes de la igualdad de cadenas, y la magia 'replace' necesaria para deshacerse de los metacaracteres es mucho peor que las llamadas' inferiores' originales.Aunque 'ILIKE' sin molestarse en dar cuenta de los metacaracteres a menudo funcionará como una sola vez rápida y sucia, no lo recomendaría como una comparación general de cadenas insensible a mayúsculas y minúsculas. – Ben

+0

@Bonshington Me encanta la idea de 'ILike': después de todos estos años, nunca lo supe. ¿Pero sabes si esto funciona para cualquier idioma o solo para inglés y el conjunto latino? ¡Gracias! +1 por su respuesta anterior. – itsols

3

También puede crear un índice en la parte inferior (correo electrónico).

Fuente

2010-12-19 11:41:32 peufeu

+1

Eso Sin embargo, sería un poco frustrante el propósito de la pregunta, el autor de la pregunta no quiere ser molestado con el uso inferior, supongo que :-) alguna razón para usar citext: http://www.depesz.com/index.php/2008/08/10/waiting-for-84-case-insensitive-text-citext/ –

-3

Use ‘Collate SQL_Latin1_General_CP1_CS_AS’ for it. 
declare @a nvarchar(5)='a' 
declare @b nvarchar(5)='A' 

if(@[email protected] Collate SQL_Latin1_General_CP1_CS_AS) 
begin 
print 'Match' 
end 
else 
begin 
print 'Not Matched' 
end

Fuente

2016-01-27 05:31:58

+0

El OP pregunta por PostgreSQL, no SQL Server. – NathanAldenSr

6

Las cosas han cambiado en los últimos 4 años desde que esta pregunta fue respondida y la recomendación "no utilice ILIKE" no es cierto más (al menos de manera general).

De hecho, dependiendo de la distribución de datos, ILIKE con trigram index podría ser incluso más rápido que citext.

Para un índice único en efecto, hay gran diferencia, que se puede ver cuando se utiliza la prueba-configuración de Michael:

create table y 
(
    id serial not null, 
    email text not null unique 
); 

insert into y(email) 
select 'some.name'||n||'@foobar.com' 
from generate_series(1,100000) as i(n); 

-- create a trigram index to support ILIKE  
create index ix_y on y using gin (email gin_trgm_ops); 

create table x 
(
    id serial not null, 
    email citext not null unique 
); 
-- no need to create an index 
-- the UNIQUE constraint will create a regular B-Tree index 

insert into x(email) 
select email 
from y;

El plan de ejecución para el uso de ILIKE:

explain (analyze) 
select * 
from y 
where email ilike ANY (ARRAY['[email protected]','[email protected]']);

Bitmap Heap Scan on y (cost=126.07..154.50 rows=20 width=29) (actual time=60.696..60.818 rows=2 loops=1) 
    Recheck Cond: (email ~~* ANY ('{[email protected],[email protected]}'::text[])) 
    Rows Removed by Index Recheck: 13 
    Heap Blocks: exact=11 
    -> Bitmap Index Scan on ix_y (cost=0.00..126.07 rows=20 width=0) (actual time=60.661..60.661 rows=15 loops=1) 
     Index Cond: (email ~~* ANY ('{[email protected],[email protected]}'::text[])) 
Planning time: 0.952 ms 
Execution time: 61.004 ms

Y para usar citext:

explain (analyze) 
select * 
from x 
where email = ANY (ARRAY['[email protected]','[email protected]']);

Index Scan using x_email_key on x (cost=0.42..5.85 rows=2 width=29) (actual time=0.111..0.203 rows=2 loops=1) 
    Index Cond: (email = ANY ('{[email protected],[email protected]}'::citext[])) 
Planning time: 0.115 ms 
Execution time: 0.254 ms

Tenga en cuenta que la consulta ILIKE es de hecho algo distinto, entonces la consulta = para Citext como ILIKE honraría comodines.

Sin embargo, para un índice no único las cosas se ven diferentes. La siguiente configuración está basada en una recent question haciendo la misma:

create table data 
(
    group_id serial primary key, 
    name text 
); 

create table data_ci 
(
    group_id serial primary key, 
    name citext 
); 

insert into data(name) 
select 'data'||i.n 
from generate_series(1,1000) as i(n), generate_series(1,1000) as i2(n); 

insert into data_ci(group_id, name) 
select group_id, name 
from data; 

create index ix_data_gin on data using gin (name public.gin_trgm_ops); 
create index ix_data_ci on data_ci (name);

así que tenemos un millón de filas en cada tabla y unos 1000 valores distintos para la columna de name y para cada valor distinto Tenemos 1000 duplicados. Una consulta que busque 3 valores diferentes devolverá 3000 filas.

En este caso, el índice trigrama es sustancialmente más rápido que el índice BTree:

explain (analyze) 
select * 
from data 
where name ilike any (array['Data1', 'data2', 'DATA3']);

Bitmap Heap Scan on data (cost=88.25..1777.61 rows=1535 width=11) (actual time=2.906..11.064 rows=3000 loops=1) 
    Recheck Cond: (name ~~* ANY ('{Data1,data2,DATA3}'::text[])) 
    Heap Blocks: exact=17 
    -> Bitmap Index Scan on ix_data_gin (cost=0.00..87.87 rows=1535 width=0) (actual time=2.869..2.869 rows=3000 loops=1) 
     Index Cond: (name ~~* ANY ('{Data1,data2,DATA3}'::text[])) 
Planning time: 2.174 ms 
Execution time: 11.282 ms

y el índice árbolB en la columna de la Citext ahora utiliza un escaneo Sec

explain analyze 
select * 
from data_ci 
where name = any (array['Data1', 'data2', 'DATA3']);

Seq Scan on data_ci (cost=0.00..10156.00 rows=2904 width=11) (actual time=0.449..304.301 rows=1000 loops=1) 
    Filter: ((name)::text = ANY ('{Data1,data2,DATA3}'::text[])) 
    Rows Removed by Filter: 999000 
Planning time: 0.152 ms 
Execution time: 304.360 ms

También el tamaño del índice GIN es en realidad más pequeño que el de la columna citext:

select pg_size_pretty(pg_total_relation_size('ix_data_gin')) as gin_index_size, 
     pg_size_pretty(pg_total_relation_size('ix_data_ci')) as citex_index_size

gin_index_size | citex_index_size 
---------------+----------------- 
11 MB   | 21 MB

Lo anterior se realizó utilizando PostgreSQL 9.6.1 en un ordenador portátil de Windows con random_page_cost conjunto a 1,5

Fuente

2017-01-17 08:14:37

PostgreSQL: comparación de cadena insensible a las mayúsculas

Respuesta

Cuestiones relacionadas