我有一个非常大的表,我想从最低的id开始循环并过滤,直到得到1000行,然后我想返回。
因此,我的函数getFilteredItems将进行一些筛选,然后返回1000行。它有一个latestItemId参数,因此它将在特定id之后占用1000行。然后,getJournalistTopicItems调用getFilteredItems,后者会过滤更多的内容。大多数情况下,此函数将过滤掉所有行,因此我希望循环遍历,直到所有过滤完成时,最终得到1000行。
我在sql和postgres方面是个超级新手,所以所有这些都可能是杀伤力太强或者完全错了,但这就是我目前所做的。
CREATE OR REPLACE FUNCTION getJournalistTopicItems() RETURNS TEXT AS $$
DECLARE
currentCount INTEGER = 0;
currentItemIds int[];
latestItem INTEGER = 0;
BEGIN
WHILE currentCount < 1000 LOOP
WITH fItems AS (
SELECT * FROM getFilteredItems(latestItem)
)
SELECT fi.itemId, ift.content
FROM fItems fi
INNER JOIN journalistitems ji ON ji.itemid = fi.itemid
INNER JOIN itemfulltexts ift ON ji.itemid = ift.itemid
INNER JOIN journalists j ON j.id = ji.journalistid
INNER JOIN contacts c ON c.journalistid = j.id
WHERE c.isprivate = FALSE
AND c.disabled = FALSE
AND c.verificationstatus = 1;
currentItemIds := array_append(fi.itemId);
currentCount := array_length(currentItemIds);
SELECT latestItemId INTO latestItem FROM getFilteredItems(latestItem) limit 1;
END LOOP;
RETURN array_to_string(currentItemIds);
END;
$$ LANGUAGE plpgsql;
函数getFilteredItems如下所示:
CREATE OR REPLACE FUNCTION getFilteredItems(latestId INTEGER) RETURNS TABLE(
latestItemId integer,
itemid integer
) AS $$
BEGIN
RETURN QUERY
WITH fItems AS(select i.id
from items i
inner join articlemetadatas am on am.itemid = i.id
where NOT EXISTS (SELECT 1 FROM journalistitemcategories jic WHERE jic.itemid = i.id)
AND NOT EXISTS (SELECT 1 FROM topicitemwords tw WHERE tw.itemid = i.id)
AND i.languagecode in ('sv')
AND am.contentprotected != 0
AND i.id > latestId
limit 1000)
SELECT (SELECT id FROM fItems order by id desc limit 1), fItems.id FROM fItems;
END;
$$ LANGUAGE plpgsql;
调用getJournalistTopicItems时出现的错误是:
ERROR: query has no destination for result data
HINT: If you want to discard the results of a SELECT, use PERFORM instead.
CONTEXT: PL/pgSQL function getjournalisttopicitems() line 8 at SQL statement
SQL state: 42601
最新更新:
CREATE OR REPLACE FUNCTION getJournalistTopicItems() RETURNS SETOF TEXT AS $func$
DECLARE
idarr int[];
idmed int[];
latestItem INTEGER = 0;
currentCount INTEGER = 0;
BEGIN
WHILE currentCount < 1000 LOOP
WITH fItems AS (
SELECT * FROM getFilteredItems(latestItem)
)
SELECT fi.itemId, ift.content
FROM fItems fi
INNER JOIN journalistitems ji ON ji.itemid = fi.itemid
INNER JOIN itemfulltexts ift ON ji.itemid = ift.itemid
INNER JOIN journalists j ON j.id = ji.journalistid
INNER JOIN contacts c ON c.journalistid = j.id
WHERE c.isprivate = FALSE
AND c.disabled = FALSE
AND c.verificationstatus = 1;
idmed := (select array_agg(itemId) from fItems);
idarr := (select idarr || idmed);
currentCount := (select array_length(currentCount, int));
latestItem := (select latestItem FROM getFilteredItems(latestItem) limit 1);
END LOOP;
RETURN QUERY SELECT array_to_string(idarr, ',');
END;
$func$ language plpgsql;