performance – Python Rabin-Karp Code Optimization

Here’s the Rabin-Karp implementation that I’ve written in Python:

from functools import lru_cache
from itertools import islice
from typing import List, Optional


BASE = 139
MOD = 2 ** 31 - 1


@lru_cache(maxsize=None)
def char_to_int(c: str) -> int:
    return ord(c) + 1

def calculate_hash(s: str, length: Optional(int) = None) -> int:
    ret_hash = 0
    if length is not None:
        s = islice(s, 0, length)
    power = len(s) - 1 if length is None else length - 1
    for i, c in enumerate(s):
        ret_hash += char_to_int(c) * BASE ** power
        power -= 1
    return ret_hash % MOD

def roll_hash(prev_hash: int, prev_char: int, next_char: str, length: int) -> int:
    new_hash = ((prev_hash - char_to_int(prev_char) * BASE ** (length - 1)) * BASE) % MOD
    new_hash += char_to_int(next_char)
    return new_hash % MOD

def rabin_karp(text: str, pattern: str) -> List(int):
    """
    Returns a list of indices where each entry corresponds to the starting index of a
    substring of `text` that exactly matches `pattern`
    """
    p_hash = calculate_hash(pattern)
    n = len(pattern)
    curr_hash = calculate_hash(text, n)
    indices = ()
    if p_hash == curr_hash:
        indices.append(0)
    for i in range(1, len(text) - n + 1):
        curr_hash = roll_hash(
            curr_hash, text(i - 1), text(i + n - 1), n
        )
        if p_hash == curr_hash:
            indices.append(i)
    return indices

if __name__ == "__main__":
    with open("lorem_ipsum.txt", "r") as f:
        text = f.read()
    pattern = "lorem"
    indices = rabin_karp(text, pattern)
    print(f"indices: {indices}")

I’m trying to optimize the code as much as possible, so I’ve tried to do some dynamic code analysis to better understand bottlenecks. I used cProfile to understand the function calls and made changes to the code accordingly to arrive at the above code. Here is the final output from cProfile:

Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
    60301    0.091    0.000    0.091    0.000 rabin_karp.py:25(roll_hash)
        1    0.035    0.035    0.126    0.126 rabin_karp.py:30(rabin_karp)
       42    0.000    0.000    0.000    0.000 rabin_karp.py:11(char_to_int)
        2    0.000    0.000    0.000    0.000 rabin_karp.py:15(calculate_hash)
       57    0.000    0.000    0.000    0.000 {method 'append' of 'list' objects}
       42    0.000    0.000    0.000    0.000 {built-in method builtins.ord}
        3    0.000    0.000    0.000    0.000 {built-in method builtins.len}

Are there any other ways I could further optimize the code? Another interesting thing of note is that adding @lru_cache actually increases execution time as measured by timeit despite the caching mechanism reducing the number of functions calls to char_to_int() (from 120612 to 42).

performance – Monte Carlo Tree Search Optimization and Loss Prevention

I’m working on a implementation of Monte Carlo Tree Search in Swift.

It’s not bad, but it could be better! I’m principally interested in making my algorithm:

  1. faster (more iterations/second)
  2. prioritize moves that prevent instant losses (you’ll see…)

Here is the main driver:

final class MonteCarloTreeSearch {
    var player: Player
    var timeBudget: Double
    var maxDepth: Int
    var explorationConstant: Double
    var root: Node?
    var iterations: Int

    init(for player: Player, timeBudget: Double = 5, maxDepth: Int = 5, explorationConstant: Double = sqrt(2)) {
        self.player = player
        self.timeBudget = timeBudget
        self.maxDepth = maxDepth
        self.explorationConstant = explorationConstant
        self.iterations = 0
    }
    
    func update(with game: Game) {
        if let newRoot = findNode(for: game) {
            newRoot.parent = nil
            newRoot.move = nil
            root = newRoot
        } else {
            root = Node(game: game)
        }
    }

    func findMove(for game: Game? = nil) -> Move? {
        iterations = 0
        let start = CFAbsoluteTimeGetCurrent()
        if let game = game {
            update(with: game)
        }
        while CFAbsoluteTimeGetCurrent() - start < timeBudget {
            refine()
            iterations += 1
        }
        print("Iterations: (iterations)")
        return bestMove
    }
    
    private func refine() {
        let leafNode = root!.select(explorationConstant)
        let value = rollout(leafNode)
        leafNode.backpropogate(value)
    }
    
    private func rollout(_ node: Node) -> Double {
        var depth = 0
        var game = node.game
        while !game.isFinished {
            if depth >= maxDepth { break }
            guard let move = game.randomMove() else { break }
            game = game.update(move)
            depth += 1
        }
        let value = game.evaluate(for: player).value
        return value
    }
    
    private var bestMove: Move? {
        root?.selectChildWithMaxUcb(0)?.move
    }
    
    private func findNode(for game: Game) -> Node? {
        guard let root = root else { return nil }
        var queue = (root)
        while !queue.isEmpty {
            let head = queue.removeFirst()
            if head.game == game {
                return head
            }
            for child in head.children {
                queue.append(child)
            }
        }
        return nil
    }
}

I built this driver with a maxDepth argument because playouts/rollouts in my real game are fairly long and I have a access to a decent static evaluation function. Also, the BFS findNode method is so that I can reuse parts of the tree.

Here’s what a node in the driver looks like:

final class Node {
    weak var parent: Node?
    var move: Move?
    var game: Game
    var untriedMoves: (Move)
    var children: (Node)
    var cumulativeValueFor: Double
    var cumulativeValueAgainst: Double
    var visits: Double

    init(parent: Node? = nil, move: Move? = nil, game: Game) {
        self.parent = parent
        self.move = move
        self.game = game
        self.children = ()
        self.untriedMoves = game.availableMoves()
        self.cumulativeValueFor = 0
        self.cumulativeValueAgainst = 0
        self.visits = 0
    }
    
    var isFullyExpanded: Bool {
        untriedMoves.isEmpty
    }
    
    lazy var isTerminal: Bool = {
        game.isFinished
    }()
    
    func select(_ c: Double) -> Node {
        var leafNode = self
        while !leafNode.isTerminal {
            if !leafNode.isFullyExpanded {
                return leafNode.expand()
            } else {
                leafNode = leafNode.selectChildWithMaxUcb(c)!
            }
        }
        return leafNode
    }
    
    func expand() -> Node {
        let move = untriedMoves.popLast()!
        let nextGame = game.update(move)
        let childNode = Node(parent: self, move: move, game: nextGame)
        children.append(childNode)
        return childNode
    }
    
    func backpropogate(_ value: Double) {
        visits += 1
        cumulativeValueFor += value
        if let parent = parent {
            parent.backpropogate(value)
        }
    }
    
    func selectChildWithMaxUcb(_ c: Double) -> Node? {
        children.max { $0.ucb(c) < $1.ucb(c) }
    }

    func ucb(_ c: Double) -> Double {
        q + c * u
    }
    
    private var q: Double {
        let value = cumulativeValueFor - cumulativeValueAgainst
        return value / visits
    }
    
    private var u: Double {
        sqrt(log(parent!.visits) / visits)
    }
}

extension Node: CustomStringConvertible {
    var description: String {
        guard let move = move else { return "" }
        return "(move) ((cumulativeValueFor)/(visits))"
    }
}

I don’t think there’s anything extraordinary about my node object? (I am hoping, though, that I can do something to/about q so that I might prevent an “instant” loss in my test game…


I’ve been testing this implementation of MCTS on a 1-D variant of “Connect 4”.

Here’s the game and all of it’s primitives:

enum Player: Int {
    case one = 1
    case two = 2
    
    var opposite: Self {
        switch self {
        case .one: return .two
        case .two: return .one
        }
    }
}

extension Player: CustomStringConvertible {
    var description: String {
        "(rawValue)"
    }
}

typealias Move = Int

enum Evaluation {
    case win
    case loss
    case draw
    case ongoing(Double)
    
    var value: Double {
        switch self {
        case .win: return 1
        case .loss: return 0
        case .draw: return 0.5
        case .ongoing(let v): return v
        }
    }
}

struct Game {
    var array: Array<Int>
    var currentPlayer: Player
    
    init(length: Int = 10, currentPlayer: Player = .one) {
        self.array = Array.init(repeating: 0, count: length)
        self.currentPlayer = currentPlayer
    }
    
    var isFinished: Bool {
        switch evaluate() {
        case .ongoing: return false
        default: return true
        }
    }

    func availableMoves() -> (Move) {
        array
            .enumerated()
            .compactMap { $0.element == 0 ? Move($0.offset) : nil}
    }
    
    func update(_ move: Move) -> Self {
        var copy = self
        copy.array(move) = currentPlayer.rawValue
        copy.currentPlayer = currentPlayer.opposite
        return copy
    }
    
    func evaluate(for player: Player) -> Evaluation {
        let player3 = three(for: player)
        let oppo3 = three(for: player.opposite)
        let remaining0 = array.contains(0)
        switch (player3, oppo3, remaining0) {
        case (true, true, _): return .draw
        case (true, false, _): return .win
        case (false, true, _): return .loss
        case (false, false, false): return .draw
        default: return .ongoing(0.5)
        }
    }
    
    private func three(for player: Player) -> Bool {
        var count = 0
        for slot in array {
            if slot == player.rawValue {
                count += 1
            } else {
                count = 0
            }
            if count == 3 {
                return true
            }
        }
        return false
    }
}

extension Game {
    func evaluate() -> Evaluation {
        evaluate(for: currentPlayer)
    }
    
    func randomMove() -> Move? {
        availableMoves().randomElement()
    }
}

extension Game: CustomStringConvertible {
    var description: String {
        return array.reduce(into: "") { result, i in
            result += String(i)
        }
    }
}

extension Game: Equatable {}

While there are definitely efficiencies to be gained in optimizing the evaluate/three(for:) scoring methods, I’m more concerned about improving the performance of the driver and the node as this “1d-connect-3” game isn’t my real game. That said, if there’s a huge mistake here and a simple fix I’ll take it!

Another note: I am actually using ongoing(Double) in my real game (I’ve got a static evaluation function that can reliably score a player as 1-99% likely to win).


A bit of Playground code:


var mcts = MonteCarloTreeSearch(for: .two, timeBudget: 5, maxDepth: 3)
var game = Game(length: 10)
// 0000000000
game = game.update(0) // player 1
// 1000000000
game = game.update(8) // player 2
// 1000000020
game = game.update(1) // player 1
// 1100000020
let move1 = mcts.findMove(for: game)!
// usually 7 or 9... and not 2
print(mcts.root!.children)
game = game.update(move1) // player 2
mcts.update(with: game)
game = game.update(4) // player 1
mcts.update(with: game)
let move2 = mcts.findMove()!

Unfortunately, move1 in this sample “playthru” doesn’t try to prevent the instant win-condition on the next turn for player 1?! (I know that orthodox Monte Carlo Tree Search is in the business of maximizing winning not minimizing losing, but not picking 2 here is unfortunate).

So yeah, any help in making all this faster (perhaps through parallelization), and fixing the “instant-loss” business would be swell!

performance – Monitor disk activity by process and by file in linux like windows ressource monitor

I have multiple disks in my system sometimes I have a heavy read usage on my root partition. I know that there are some processes reading a lot of data, but from all disks. How can I find out which process is accessing which file?

I’ve tried htop, dstat or iotop. However these seem not to be able to show activity by process and file. In windows ressource monitor does this job perfect:

enter image description here

I can’t imagine that there isn’t a similar console tool for linux.

memory – DDR4 and LPDDR4X Performance

I am currently looking into buying a new machine and am confronted with the choice between DDR4 and LPDDR4X RAM. Online, I found various contradictory discussions on the trade-offs between the two types. LPDDR4X supposedly has a higher data transfer rate, but DDR4 apparently has a stronger busing technology. The machine I want to buy is a laptop that I use for data science computations, often focused on geo-spatial data, machine learning etc. that take a few hours or days at maximum CPU load. While outsourcing some computations to a cloud server, for convenience reasons, I still run many of them locally on the laptop. Therefore, I would like to buy the faster type of RAM. Which one performs better: DDR4 or LPDDR4X?

I checked all the Stack Exchange forums, and while Server Fault might not be a perfect match (question about a local computer rather than a server) for this question, it appeared to be the most appropriate one.

performance – How to minimize CLS (Cumulative Layout Shift) for responsive layouts?

What are some suggestions for reducing content layout shift for responsive layouts?

From researching https://web.dev/cls:
“Always include size attributes on your images and video elements, or otherwise reserve the required space with something like CSS aspect ratio boxes”.

But with responsive layouts, the containers for images and other content change.
So, do we need to reserve space for container divs, etc.?

How do you reserve space for these elements for responsive layouts?

mysql – Complex `where` dramatically affects query performance

There are much more complex queries running on these tables that are much faster but this one takes 90+ seconds to run.

What am I doing wrong here? How can this be improved?

SELECT 
            r.id as `ID`,  
            concat(r.fname, " ", r.lname) as `Applicant`,
            r.added `App Time`,
            concat(trim(r.city), ", ", r.state) as `City`, 
            coalesce(q.count, 0) as `Attempts`,
            coalesce(q.last_called, 0) as `Last Called`,
            null as `Removed`
        FROM myfreshp_crm.records r 
        left join (
            SELECT rid, count(rid) as count, max(called) as last_called
            from myfreshp_crm.cc_queue
            where status = 'called'
            group by rid
        ) q on q.rid = r.id 
        left join (
            select rid, max(time) as appt
            from myfreshp_crm.calendar 
            where event = 'Appointment' 
            group by rid
        ) a on a.rid = r.id
        left join (
            select rid, max(sent) as sent
            from myfreshp_crm.cc_queue
            group by rid
        ) c on c.rid = r.id
        where 
        r.id not in (select lead_id from asap_blacklist) 
        and coalesce(q.count, 0) < 4
        AND ( 
            c.sent > (UNIX_TIMESTAMP() - (60 * 60 * 24 * 28)) OR
            r.added > (UNIX_TIMESTAMP() - (60 * 60 * 24 * 28))
        )
        AND (
            (
                a.appt is not null
                and a.appt < UNIX_TIMESTAMP()
                and c.sent is not null
                and c.sent > a.appt
            ) OR (
                r.source = 'Online' 
                and a.appt is null
            )
        )

Explain extended.. is as follows:

+----+-------------+----------------+------+---------------+-------------+---------+-------------------+---------+----------+----------------------------------------------+
| id | select_type | table          | type | possible_keys | key         | key_len | ref               | rows    | filtered | Extra                                        |
+----+-------------+----------------+------+---------------+-------------+---------+-------------------+---------+----------+----------------------------------------------+
|  1 | PRIMARY     | r              | ALL  | added,source  | NULL        | NULL    | NULL              | 3436521 |   100.00 | Using where                                  |
|  1 | PRIMARY     | <derived2>     | ref  | <auto_key0>   | <auto_key0> | 4       | myfreshp_crm.r.id |      10 |   100.00 | Using where                                  |
|  1 | PRIMARY     | <derived3>     | ref  | <auto_key1>   | <auto_key1> | 4       | myfreshp_crm.r.id |      15 |   100.00 | Using where                                  |
|  1 | PRIMARY     | <derived4>     | ref  | <auto_key1>   | <auto_key1> | 4       | myfreshp_crm.r.id |      15 |   100.00 | Using where                                  |
|  5 | SUBQUERY    | asap_blacklist | ALL  | NULL          | NULL        | NULL    | NULL              |     287 |   100.00 | NULL                                         |
|  4 | DERIVED     | cc_queue       | ALL  | rid           | NULL        | NULL    | NULL              |   77090 |   100.00 | Using temporary; Using filesort              |
|  3 | DERIVED     | calendar       | ALL  | rid,event     | NULL        | NULL    | NULL              |  102750 |    97.15 | Using where; Using temporary; Using filesort |
|  2 | DERIVED     | cc_queue       | ALL  | rid,status    | NULL        | NULL    | NULL              |   77090 |    99.39 | Using where; Using temporary; Using filesort |
+----+-------------+----------------+------+---------------+-------------+---------+-------------------+---------+----------+----------------------------------------------+
8 rows in set, 1 warning (0.08 sec)

Show warnings; provides this:


| Level | Code | Message|

| Note  | 1003 | /* select#1 */ select `myfreshp_crm`.`r`.`id` AS `ID`,concat(`myfreshp_crm`.`r`.`fname`,' ',`myfreshp_crm`.`r`.`lname`) AS `Applicant`,`myfreshp_crm`.`r`.`added` AS `App Time`,concat(trim(`myfreshp_crm`.`r`.`city`),', ',`myfreshp_crm`.`r`.`state`) AS `City`,coalesce(`q`.`count`,0) AS `Attempts`,coalesce(`q`.`last_called`,0) AS `Last Called`,NULL AS `Removed` from `myfreshp_crm`.`records` `r` left join (/* select#2 */ select `myfreshp_crm`.`cc_queue`.`rid` AS `rid`,count(`myfreshp_crm`.`cc_queue`.`rid`) AS `count`,max(`myfreshp_crm`.`cc_queue`.`called`) AS `last_called` from `myfreshp_crm`.`cc_queue` where (`myfreshp_crm`.`cc_queue`.`status` = 'called') group by `myfreshp_crm`.`cc_queue`.`rid`) `q` on((`q`.`rid` = `myfreshp_crm`.`r`.`id`)) left join (/* select#3 */ select `myfreshp_crm`.`calendar`.`rid` AS `rid`,max(`myfreshp_crm`.`calendar`.`time`) AS `appt` from `myfreshp_crm`.`calendar` where (`myfreshp_crm`.`calendar`.`event` = 'Appointment') group by `myfreshp_crm`.`calendar`.`rid`) `a` on((`a`.`rid` = `myfreshp_crm`.`r`.`id`)) left join (/* select#4 */ select `myfreshp_crm`.`cc_queue`.`rid` AS `rid`,max(`myfreshp_crm`.`cc_queue`.`sent`) AS `sent` from `myfreshp_crm`.`cc_queue` group by `myfreshp_crm`.`cc_queue`.`rid`) `c` on((`c`.`rid` = `myfreshp_crm`.`r`.`id`)) where ((not(<in_optimizer>(`myfreshp_crm`.`r`.`id`,`myfreshp_crm`.`r`.`id` in ( <materialize> (/* select#5 */ select `myfreshp_crm`.`asap_blacklist`.`lead_id` from `myfreshp_crm`.`asap_blacklist` where 1 ), <primary_index_lookup>(`myfreshp_crm`.`r`.`id` in <temporary table> on <auto_key> where ((`myfreshp_crm`.`r`.`id` = `materialized-subquery`.`lead_id`))))))) and (coalesce(`q`.`count`,0) < 4) and ((`c`.`sent` > <cache>((unix_timestamp() - (((60 * 60) * 24) * 28)))) or (`myfreshp_crm`.`r`.`added` > <cache>((unix_timestamp() - (((60 * 60) * 24) * 28))))) and (((`a`.`appt` is not null) and (`a`.`appt` < <cache>(unix_timestamp())) and (`c`.`sent` is not null) and (`c`.`sent` > `a`.`appt`)) or ((`myfreshp_crm`.`r`.`source` = 'Online') and isnull(`a`.`appt`)))) |

1 row in set (0.00 sec)

The records table:

CREATE TABLE `records` (
  `id` int(20) NOT NULL AUTO_INCREMENT,
  `uid` int(20) NOT NULL,
  `cid` int(20) NOT NULL,
  `vid` int(8) NOT NULL,
  `added` int(25) NOT NULL,
  `fname` varchar(50) NOT NULL,
  `mname` varchar(50) NOT NULL,
  `lname` varchar(50) NOT NULL,
  `address` varchar(200) NOT NULL,
  `city` varchar(50) NOT NULL,
  `state` varchar(50) NOT NULL,
  `zip` int(5) NOT NULL,
  `phone1` varchar(16) NOT NULL,
  `phone2` varchar(16) NOT NULL,
  `mobilephone` varchar(16) NOT NULL,
  `email` varchar(100) NOT NULL,
  `status` enum('active','inactive','followup','responded','sold','dead') NOT NULL,
  `ssn` varchar(11) NOT NULL,
  `perm` enum('yes','no') NOT NULL DEFAULT 'no',
  `printed_label` int(30) NOT NULL,
  `printed_letter` int(30) NOT NULL,
  `dob` varchar(20) NOT NULL,
  `source` varchar(15) NOT NULL DEFAULT 'imported',
  `opt_out` enum('no','yes') NOT NULL DEFAULT 'no',
  `other_data` longtext NOT NULL,
  `sms_opt_in` int(11) NOT NULL,
  PRIMARY KEY (`id`),
  KEY `cid` (`cid`),
  KEY `uid` (`uid`),
  KEY `vid` (`vid`),
  KEY `status` (`status`),
  KEY `uid_2` (`uid`),
  KEY `printed_label` (`printed_label`),
  KEY `fname` (`fname`),
  KEY `mname` (`mname`),
  KEY `lname` (`lname`),
  KEY `phone1` (`phone1`),
  KEY `phone2` (`phone2`),
  KEY `printed_letter` (`printed_letter`),
  KEY `address` (`address`),
  KEY `city` (`city`),
  KEY `state` (`state`),
  KEY `added` (`added`),
  KEY `source` (`source`),
  KEY `email` (`email`),
  KEY `zip` (`zip`),
  KEY `ssn` (`ssn`),
  KEY `dob` (`dob`)
) ENGINE=InnoDB AUTO_INCREMENT=8938455 DEFAULT CHARSET=latin1

The cc_queue table:

CREATE TABLE `cc_queue` (
  `id` int(20) NOT NULL AUTO_INCREMENT,
  `rid` int(20) NOT NULL,
  `sent` int(30) NOT NULL,
  `called` int(30) NOT NULL,
  `reason` varchar(150) COLLATE utf8_unicode_ci NOT NULL,
  `status` enum('waiting','called') COLLATE utf8_unicode_ci NOT NULL,
  `disposition` longtext COLLATE utf8_unicode_ci NOT NULL,
  `comments` varchar(250) COLLATE utf8_unicode_ci NOT NULL,
  `sentToCC` int(11) NOT NULL,
  PRIMARY KEY (`id`),
  KEY `rid` (`rid`),
  KEY `status` (`status`),
  KEY `sent` (`sent`),
  KEY `called` (`called`),
  KEY `sentToCC` (`sentToCC`)
) ENGINE=MyISAM AUTO_INCREMENT=77097 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci

The calendar table:

CREATE TABLE `calendar` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `uid` int(11) NOT NULL,
  `rid` int(20) NOT NULL,
  `added` int(25) NOT NULL,
  `time` int(11) NOT NULL,
  `event` varchar(500) COLLATE utf8_unicode_ci NOT NULL,
  `details` varchar(1000) COLLATE utf8_unicode_ci NOT NULL,
  PRIMARY KEY (`id`),
  KEY `uid` (`uid`),
  KEY `rid` (`rid`),
  KEY `added` (`added`),
  KEY `time` (`time`),
  KEY `event` (`event`(333))
) ENGINE=MyISAM AUTO_INCREMENT=151930 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci

The asap_blacklist table:

CREATE TABLE `asap_blacklist` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `time` int(11) NOT NULL,
  `lead_id` int(11) NOT NULL,
  PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=1483 DEFAULT CHARSET=utf8 COLLATE=utf8_bin

And here’s what information_schema says…

select * from information_schema.tables where table_name in ('records', 'cc_queue', 'calendar', 'asap_blacklist');
+---------------+--------------+----------------+------------+--------+---------+------------+------------+----------------+-------------+-----------------+--------------+-----------+----------------+---------------------+---------------------+---------------------+-------------------+----------+----------------+---------------+
| TABLE_CATALOG | TABLE_SCHEMA | TABLE_NAME     | TABLE_TYPE | ENGINE | VERSION | ROW_FORMAT | TABLE_ROWS | AVG_ROW_LENGTH | DATA_LENGTH | MAX_DATA_LENGTH | INDEX_LENGTH | DATA_FREE | AUTO_INCREMENT | CREATE_TIME         | UPDATE_TIME         | CHECK_TIME          | TABLE_COLLATION   | CHECKSUM | CREATE_OPTIONS | TABLE_COMMENT |
+---------------+--------------+----------------+------------+--------+---------+------------+------------+----------------+-------------+-----------------+--------------+-----------+----------------+---------------------+---------------------+---------------------+-------------------+----------+----------------+---------------+
| def           | myfreshp_crm | asap_blacklist | BASE TABLE | InnoDB |      10 | Compact    |        287 |             57 |       16384 |               0 |            0 |         0 |           1483 | 2021-03-13 22:20:35 | NULL                | NULL                | utf8_bin          |     NULL |                |               |
| def           | myfreshp_crm | calendar       | BASE TABLE | MyISAM |      10 | Dynamic    |     102750 |            178 |    18325956 | 281474976710655 |      7480320 |         0 |         151930 | 2015-10-06 13:07:55 | 2021-05-04 21:38:09 | 2016-06-04 21:10:52 | utf8_unicode_ci   |     NULL |                |               |
| def           | myfreshp_crm | cc_queue       | BASE TABLE | MyISAM |      10 | Dynamic    |      77092 |            112 |    14584528 | 281474976710655 |      5064704 |   5935072 |          77097 | 2015-12-09 09:43:24 | 2021-05-05 09:30:02 | 2016-06-04 21:10:52 | utf8_unicode_ci   |     NULL |                |               |
| def           | myfreshp_crm | records        | BASE TABLE | InnoDB |      10 | Compact    |    3436523 |            204 |   702349312 |               0 |   1715929088 |   6291456 |        8938456 | 2021-02-18 04:16:51 | NULL                | NULL                | latin1_swedish_ci |     NULL |                |               |
+---------------+--------------+----------------+------------+--------+---------+------------+------------+----------------+-------------+-----------------+--------------+-----------+----------------+---------------------+---------------------+---------------------+-------------------+----------+----------------+---------------+
4 rows in set (0.00 sec)

Decent Shared Hosting with Low TTFB and good performance?

Hi,

I am looking for shared hosting with low TTFB (less than 200ms) and good performance – I test it with GTmetrix and tools.keycdn.com/p… | Read the rest of https://www.webhostingtalk.com/showthread.php?t=1845120&goto=newpost

mysql 5.7 – Performance issues from RDS Aurora 5.6 to Aurora 5.7

Recently upgraded one of my RDS instances to aurora mysql 5.7 from 5.6 and I’ve had to triple the instance class while I try to resolve this (cpu eventually pegs and never declines).

Most reads are slow now (some about 30% slower, others are up to 400% slower). Looking at explain output, the queries are still using proper indexes. innodb_buffer_pool_size us set to default RDS value (instance class*3/24 I think) and most other innodb_* settings are not modifiable.

Here’s an example Mysql5.6:

| count(*) |
+----------+
| 20646739 |
+----------+
1 row in set (0.06 sec)

Mysql5.7:

mysql> select count(*) from nope;                                                                                                                                                                    +----------+
| count(*) |
+----------+
| 20646739 |
+----------+
1 row in set (3.77 sec)

Anyone else encounter this that can provide some insight into some setting I can tweak?

performance – Parallel 3D Discrete Cosine Transformation Implementation in Matlab

I am trying to implement 3D Discrete Cosine Transformation calculation in Matlab with parallel computing parfor. The formula of 3D Discrete Cosine Transformation is as follows.

The experimental implementation

The experimental implementation of 3D Discrete Cosine Transformation function is DCT3D.

function X=DCT3D(x)

N1=size(x,1);
N2=size(x,2);
N3=size(x,3);
X=zeros(N1,N2,N3);

for k1=0:N1-1
    for k2=0:N2-1
        for k3=0:N3-1
            sumResult=0;            
            parfor n1=0:N1-1
                for n2=0:N2-1
                    for n3=0:N3-1
                        sumResult=sumResult+...
                            x(n1+1,n2+1,n3+1)*...
                            cos(pi/(2*N1)*(2*n1+1)*k1)*...
                            cos(pi/(2*N2)*(2*n2+1)*k2)*...
                            cos(pi/(2*N3)*(2*n3+1)*k3);
                    end
                end
            end
            X(k1+1,k2+1,k3+1)=8*sumResult*CalculateK(k1)*CalculateK(k2)*CalculateK(k3)/(N1*N2*N3);            
        end
    end
end

Moreover, the used function CalculateK:

function output = CalculateK(x)
  output = ones(size(x));
  output(x==0) = 1 / sqrt(2);

Test cases

%% Create test cells
testCellsSize = 10;

test = zeros(testCellsSize, testCellsSize, testCellsSize);
for x = 1:size(test, 1)
    for y = 1:size(test, 2)
        for z = 1:size(test, 3)
            test(x, y, z) = x * 100 + y * 10 + z;
        end
    end
end

%% Perform test
result = DCT3D(test);

%% Print output
for z = 1:size(result, 3)
    fprintf('3D DCT result: %d planen' , z);
    for x = 1:size(result, 1)
        for y = 1:size(result, 2)
            fprintf('%ft' , result(x, y, z));
        end
        fprintf('n');
    end
    fprintf('n');
end

The output of the test code above:

3D DCT result: 1 plane
1726.754760 -80.720722  -0.000000   -8.646042   -0.000000   -2.828427   0.000000    -1.143708   -0.000000   -0.320717   
-807.207224 0.000000    0.000000    -0.000000   0.000000    -0.000000   -0.000000   -0.000000   0.000000    -0.000000   
-0.000000   -0.000000   0.000000    0.000000    0.000000    -0.000000   0.000000    -0.000000   0.000000    -0.000000   
-86.460422  -0.000000   0.000000    -0.000000   -0.000000   -0.000000   0.000000    -0.000000   0.000000    -0.000000   
-0.000000   -0.000000   0.000000    0.000000    -0.000000   0.000000    -0.000000   -0.000000   -0.000000   -0.000000   
-28.284271  -0.000000   0.000000    -0.000000   -0.000000   0.000000    0.000000    -0.000000   -0.000000   0.000000    
0.000000    0.000000    -0.000000   0.000000    -0.000000   0.000000    0.000000    -0.000000   0.000000    0.000000    
-11.437076  -0.000000   0.000000    0.000000    -0.000000   0.000000    -0.000000   -0.000000   0.000000    -0.000000   
-0.000000   0.000000    0.000000    -0.000000   -0.000000   0.000000    0.000000    -0.000000   -0.000000   0.000000    
-3.207174   -0.000000   0.000000    -0.000000   0.000000    0.000000    -0.000000   -0.000000   0.000000    0.000000    

3D DCT result: 2 plane
-8.072072   0.000000    -0.000000   0.000000    -0.000000   0.000000    -0.000000   0.000000    0.000000    -0.000000   
-0.000000   -0.000000   -0.000000   -0.000000   -0.000000   -0.000000   -0.000000   -0.000000   0.000000    -0.000000   
-0.000000   0.000000    -0.000000   0.000000    -0.000000   0.000000    0.000000    0.000000    -0.000000   0.000000    
-0.000000   -0.000000   -0.000000   0.000000    -0.000000   -0.000000   -0.000000   0.000000    0.000000    -0.000000   
-0.000000   -0.000000   0.000000    -0.000000   -0.000000   0.000000    -0.000000   0.000000    0.000000    -0.000000   
-0.000000   0.000000    -0.000000   0.000000    0.000000    0.000000    0.000000    -0.000000   -0.000000   -0.000000   
0.000000    0.000000    -0.000000   -0.000000   0.000000    -0.000000   0.000000    -0.000000   -0.000000   0.000000    
-0.000000   0.000000    0.000000    -0.000000   0.000000    0.000000    0.000000    -0.000000   -0.000000   -0.000000   
0.000000    0.000000    0.000000    0.000000    0.000000    0.000000    0.000000    -0.000000   -0.000000   0.000000    
0.000000    -0.000000   -0.000000   0.000000    0.000000    0.000000    -0.000000   0.000000    -0.000000   0.000000    

3D DCT result: 3 plane
-0.000000   0.000000    0.000000    -0.000000   -0.000000   0.000000    -0.000000   -0.000000   0.000000    0.000000    
0.000000    -0.000000   -0.000000   -0.000000   0.000000    0.000000    -0.000000   -0.000000   0.000000    0.000000    
-0.000000   -0.000000   0.000000    -0.000000   0.000000    0.000000    0.000000    0.000000    -0.000000   0.000000    
0.000000    -0.000000   0.000000    -0.000000   0.000000    0.000000    0.000000    0.000000    -0.000000   0.000000    
0.000000    0.000000    0.000000    -0.000000   -0.000000   -0.000000   0.000000    0.000000    -0.000000   -0.000000   
-0.000000   0.000000    0.000000    -0.000000   0.000000    0.000000    -0.000000   -0.000000   -0.000000   -0.000000   
-0.000000   -0.000000   -0.000000   0.000000    0.000000    0.000000    0.000000    -0.000000   -0.000000   0.000000    
0.000000    0.000000    -0.000000   -0.000000   0.000000    -0.000000   -0.000000   0.000000    0.000000    0.000000    
-0.000000   0.000000    0.000000    -0.000000   -0.000000   0.000000    -0.000000   -0.000000   0.000000    0.000000    
0.000000    0.000000    -0.000000   -0.000000   -0.000000   0.000000    0.000000    0.000000    0.000000    0.000000    

3D DCT result: 4 plane
-0.864604   -0.000000   -0.000000   -0.000000   -0.000000   -0.000000   0.000000    0.000000    0.000000    -0.000000   
-0.000000   -0.000000   -0.000000   0.000000    0.000000    -0.000000   -0.000000   -0.000000   0.000000    -0.000000   
-0.000000   -0.000000   -0.000000   -0.000000   0.000000    0.000000    0.000000    0.000000    -0.000000   0.000000    
-0.000000   0.000000    0.000000    -0.000000   -0.000000   0.000000    0.000000    -0.000000   -0.000000   -0.000000   
-0.000000   0.000000    -0.000000   0.000000    0.000000    -0.000000   0.000000    0.000000    -0.000000   0.000000    
-0.000000   0.000000    -0.000000   0.000000    0.000000    -0.000000   0.000000    -0.000000   0.000000    -0.000000   
-0.000000   0.000000    -0.000000   0.000000    -0.000000   0.000000    -0.000000   0.000000    -0.000000   0.000000    
0.000000    -0.000000   0.000000    0.000000    -0.000000   -0.000000   -0.000000   0.000000    0.000000    0.000000    
-0.000000   -0.000000   -0.000000   -0.000000   -0.000000   0.000000    -0.000000   -0.000000   0.000000    0.000000    
0.000000    -0.000000   0.000000    -0.000000   -0.000000   -0.000000   -0.000000   0.000000    0.000000    -0.000000   

3D DCT result: 5 plane
-0.000000   0.000000    0.000000    0.000000    0.000000    0.000000    0.000000    -0.000000   -0.000000   0.000000    
0.000000    -0.000000   -0.000000   -0.000000   -0.000000   0.000000    0.000000    0.000000    0.000000    0.000000    
-0.000000   0.000000    -0.000000   -0.000000   -0.000000   -0.000000   -0.000000   -0.000000   0.000000    0.000000    
0.000000    -0.000000   -0.000000   -0.000000   0.000000    -0.000000   0.000000    0.000000    -0.000000   0.000000    
0.000000    0.000000    0.000000    -0.000000   -0.000000   0.000000    0.000000    0.000000    0.000000    -0.000000   
0.000000    -0.000000   -0.000000   -0.000000   0.000000    -0.000000   -0.000000   -0.000000   0.000000    0.000000    
0.000000    0.000000    -0.000000   -0.000000   0.000000    -0.000000   -0.000000   0.000000    0.000000    -0.000000   
-0.000000   -0.000000   0.000000    0.000000    -0.000000   0.000000    0.000000    0.000000    -0.000000   0.000000    
0.000000    0.000000    -0.000000   0.000000    0.000000    -0.000000   -0.000000   -0.000000   -0.000000   -0.000000   
-0.000000   0.000000    0.000000    0.000000    -0.000000   -0.000000   0.000000    -0.000000   0.000000    -0.000000   

3D DCT result: 6 plane
-0.282843   -0.000000   -0.000000   -0.000000   0.000000    -0.000000   -0.000000   -0.000000   0.000000    -0.000000   
-0.000000   0.000000    0.000000    0.000000    -0.000000   -0.000000   -0.000000   0.000000    0.000000    0.000000    
-0.000000   0.000000    0.000000    -0.000000   0.000000    -0.000000   -0.000000   -0.000000   0.000000    -0.000000   
-0.000000   -0.000000   0.000000    -0.000000   -0.000000   -0.000000   -0.000000   0.000000    0.000000    0.000000    
-0.000000   0.000000    0.000000    0.000000    -0.000000   0.000000    -0.000000   -0.000000   -0.000000   0.000000    
-0.000000   0.000000    -0.000000   -0.000000   -0.000000   0.000000    0.000000    0.000000    -0.000000   0.000000    
0.000000    -0.000000   -0.000000   0.000000    -0.000000   0.000000    -0.000000   0.000000    -0.000000   0.000000    
0.000000    -0.000000   -0.000000   0.000000    -0.000000   0.000000    -0.000000   0.000000    0.000000    -0.000000   
0.000000    -0.000000   0.000000    0.000000    0.000000    0.000000    -0.000000   -0.000000   0.000000    0.000000    
0.000000    -0.000000   0.000000    -0.000000   0.000000    -0.000000   -0.000000   -0.000000   0.000000    -0.000000   

3D DCT result: 7 plane
0.000000    -0.000000   0.000000    -0.000000   -0.000000   -0.000000   -0.000000   -0.000000   0.000000    -0.000000   
-0.000000   -0.000000   0.000000    0.000000    0.000000    0.000000    0.000000    -0.000000   0.000000    0.000000    
0.000000    -0.000000   -0.000000   0.000000    -0.000000   -0.000000   0.000000    0.000000    -0.000000   -0.000000   
-0.000000   -0.000000   -0.000000   0.000000    0.000000    -0.000000   -0.000000   -0.000000   -0.000000   -0.000000   
0.000000    0.000000    -0.000000   0.000000    0.000000    0.000000    0.000000    0.000000    -0.000000   -0.000000   
-0.000000   0.000000    0.000000    -0.000000   0.000000    0.000000    0.000000    -0.000000   -0.000000   -0.000000   
-0.000000   -0.000000   0.000000    0.000000    0.000000    0.000000    -0.000000   0.000000    -0.000000   -0.000000   
0.000000    -0.000000   0.000000    0.000000    0.000000    0.000000    -0.000000   -0.000000   0.000000    0.000000    
0.000000    -0.000000   -0.000000   0.000000    0.000000    0.000000    0.000000    -0.000000   0.000000    -0.000000   
-0.000000   -0.000000   -0.000000   -0.000000   0.000000    0.000000    0.000000    -0.000000   0.000000    0.000000    

3D DCT result: 8 plane
-0.114371   -0.000000   -0.000000   0.000000    -0.000000   -0.000000   -0.000000   -0.000000   -0.000000   -0.000000   
-0.000000   -0.000000   -0.000000   0.000000    -0.000000   -0.000000   0.000000    0.000000    0.000000    -0.000000   
-0.000000   -0.000000   0.000000    0.000000    0.000000    -0.000000   0.000000    -0.000000   0.000000    0.000000    
-0.000000   0.000000    0.000000    -0.000000   -0.000000   0.000000    -0.000000   -0.000000   -0.000000   0.000000    
0.000000    -0.000000   -0.000000   -0.000000   0.000000    0.000000    -0.000000   -0.000000   -0.000000   -0.000000   
-0.000000   0.000000    0.000000    0.000000    -0.000000   -0.000000   0.000000    -0.000000   -0.000000   0.000000    
-0.000000   0.000000    0.000000    -0.000000   0.000000    -0.000000   0.000000    -0.000000   0.000000    0.000000    
-0.000000   0.000000    -0.000000   0.000000    -0.000000   -0.000000   -0.000000   0.000000    -0.000000   -0.000000   
-0.000000   0.000000    0.000000    -0.000000   -0.000000   -0.000000   -0.000000   -0.000000   -0.000000   0.000000    
0.000000    -0.000000   0.000000    0.000000    0.000000    -0.000000   -0.000000   0.000000    0.000000    0.000000    

3D DCT result: 9 plane
-0.000000   0.000000    0.000000    0.000000    -0.000000   0.000000    0.000000    0.000000    -0.000000   -0.000000   
0.000000    0.000000    0.000000    0.000000    -0.000000   0.000000    -0.000000   0.000000    0.000000    0.000000    
0.000000    -0.000000   0.000000    -0.000000   0.000000    -0.000000   0.000000    0.000000    -0.000000   -0.000000   
0.000000    0.000000    0.000000    -0.000000   -0.000000   0.000000    -0.000000   -0.000000   0.000000    -0.000000   
-0.000000   0.000000    0.000000    0.000000    -0.000000   -0.000000   0.000000    -0.000000   0.000000    -0.000000   
0.000000    -0.000000   -0.000000   0.000000    -0.000000   -0.000000   -0.000000   0.000000    0.000000    0.000000    
-0.000000   0.000000    0.000000    -0.000000   -0.000000   -0.000000   0.000000    -0.000000   0.000000    -0.000000   
0.000000    0.000000    0.000000    0.000000    0.000000    -0.000000   0.000000    -0.000000   0.000000    -0.000000   
-0.000000   -0.000000   0.000000    0.000000    -0.000000   0.000000    -0.000000   0.000000    -0.000000   -0.000000   
0.000000    -0.000000   -0.000000   -0.000000   -0.000000   -0.000000   0.000000    0.000000    0.000000    0.000000    

3D DCT result: 10 plane
-0.032072   -0.000000   0.000000    -0.000000   -0.000000   0.000000    0.000000    0.000000    -0.000000   0.000000    
-0.000000   -0.000000   -0.000000   0.000000    -0.000000   -0.000000   0.000000    -0.000000   -0.000000   0.000000    
0.000000    0.000000    0.000000    -0.000000   -0.000000   0.000000    0.000000    0.000000    0.000000    0.000000    
-0.000000   0.000000    0.000000    -0.000000   -0.000000   0.000000    0.000000    -0.000000   0.000000    0.000000    
-0.000000   -0.000000   -0.000000   0.000000    0.000000    0.000000    -0.000000   -0.000000   -0.000000   0.000000    
-0.000000   -0.000000   0.000000    -0.000000   -0.000000   -0.000000   -0.000000   -0.000000   -0.000000   -0.000000   
0.000000    0.000000    -0.000000   -0.000000   0.000000    -0.000000   0.000000    0.000000    -0.000000   -0.000000   
-0.000000   0.000000    -0.000000   -0.000000   0.000000    -0.000000   0.000000    -0.000000   -0.000000   0.000000    
0.000000    -0.000000   0.000000    -0.000000   0.000000    0.000000    0.000000    -0.000000   -0.000000   0.000000    
-0.000000   0.000000    -0.000000   0.000000    0.000000    0.000000    0.000000    0.000000    0.000000    0.000000    

If there is any possible improvement, please let me know.

query performance – Why does a GIST index on a cube column in PostgreSQL actually make K-Nearest Neighbor (KNN) ORDER BY queries worse?

Adding a GIST index actually seems to make K-Nearest Neighbor (KNN) ORDER BY queries on cube columns worse in PostgreSQL. Why would that be, and what can be done about it?

Here’s what I mean. In a PostgreSQL database I have a table whose DDL is create sample (id serial primary key, title text, embedding cube) where the embedding column is an embedding vector of the title obtained with a Google language model. The cube data type is provided by the cube extension, which I have installed. Incidentally, these are titles of Wikipedia articles. In any case, there are 1 million records. I then perform a KNN query with the following query. This query defines distance using the Euclidean distance operator <->, though results are similar for the other two metrics. It does an ORDER BY and applies a LIMIT in order to find 10 Wikipedia articles with “similar” titles (the most similar being the target title itself). That all works fine.

select sample.title, sample.embedding <-> cube('(0.18936706, -0.12455666, -0.31581765, 0.0192692, -0.07364611, 0.07851536, 0.0290586, -0.02582532, -0.03378124, -0.10564457, -0.03903799, 0.08668878, -0.15357816, -0.17793414, -0.01826405, 0.01969068, 0.11386908, 0.1555583, 0.09368557, 0.13697313, -0.05610929, -0.06536788, -0.12212707, 0.26356605, -0.06004387, -0.01966437, -0.1250324, -0.16645767, -0.13525756, 0.22482251, -0.1709727, 0.28966117, -0.07927769, -0.02498624, -0.10018375, -0.10923951, 0.04770213, 0.11573371, 0.04619929, 0.05216618, 0.19176421, 0.12948817, 0.08719034, -0.16109011, -0.02411379, -0.05638905, -0.37334979, 0.31225419, 0.0744801, 0.27044332)') distance from sample order by distance limit 10;

What’s puzzling to me, however, is that, if I put a GIST index on the embedding column, the query performance actually is worse. Adding the index, the query plan changes as expected, in the way expected, insofar as it uses the index. But…it gets slower!

This seems to run contrary to the documentation for cube which states:

In addition, a cube GiST index can be used to find nearest neighbors using the metric operators <->, <#>, and <=> in ORDER BY clauses

They even provide an example query, which is very similar to mine.

SELECT c FROM test ORDER BY c <-> cube(array(0.5,0.5,0.5)) LIMIT 1

Here’s the query plan and timing info before dropping the index.

-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Limit  (cost=0.41..6.30 rows=10 width=29)
   ->  Index Scan using sample_embedding_idx on sample  (cost=0.41..589360.33 rows=999996 width=29)
         Order By: (embedding <-> '(0.18936706, -0.12455666, -0.31581765, 0.0192692, -0.07364611, 0.07851536, 0.0290586, -0.02582532, -0.03378124, -0.10564457, -0.03903799, 0.08668878, -0.15357816, -0.17793414, -0.01826405, 0.01969068, 0.11386908, 0.1555583, 0.09368557, 0.13697313, -0.05610929, -0.06536788, -0.12212707, 0.26356605, -0.06004387, -0.01966437, -0.1250324, -0.16645767, -0.13525756, 0.22482251, -0.1709727, 0.28966117, -0.07927769, -0.02498624, -0.10018375, -0.10923951, 0.04770213, 0.11573371, 0.04619929, 0.05216618, 0.19176421, 0.12948817, 0.08719034, -0.16109011, -0.02411379, -0.05638905, -0.37334979, 0.31225419, 0.0744801, 0.27044332)'::cube)
(3 rows)

        title         |      distance      
----------------------+--------------------
 david petrarca       | 0.5866321762629475
 david adamski        | 0.5866321762629475
 richard ansdell      | 0.6239883862603475
 linda darke          | 0.6392124797481789
 ilias tsiliggiris    | 0.6996660649119893
 watson, jim          | 0.7059481479504834
 sk radni%c4%8dki     |   0.71718948226995
 burnham, pa          | 0.7384858030758069
 arthur (europa-park) | 0.7468462897336924
 ivan kecojevic       | 0.7488206082281348
(10 rows)

Time: 1226.457 ms (00:01.226)

And, here’s the query plan and timing info after dropping the index.


 Limit  (cost=74036.32..74037.48 rows=10 width=29)
   ->  Gather Merge  (cost=74036.32..171264.94 rows=833330 width=29)
         Workers Planned: 2
         ->  Sort  (cost=73036.29..74077.96 rows=416665 width=29)
               Sort Key: ((embedding <-> '(0.18936706, -0.12455666, -0.31581765, 0.0192692, -0.07364611, 0.07851536, 0.0290586, -0.02582532, -0.03378124, -0.10564457, -0.03903799, 0.08668878, -0.15357816, -0.17793414, -0.01826405, 0.01969068, 0.11386908, 0.1555583, 0.09368557, 0.13697313, -0.05610929, -0.06536788, -0.12212707, 0.26356605, -0.06004387, -0.01966437, -0.1250324, -0.16645767, -0.13525756, 0.22482251, -0.1709727, 0.28966117, -0.07927769, -0.02498624, -0.10018375, -0.10923951, 0.04770213, 0.11573371, 0.04619929, 0.05216618, 0.19176421, 0.12948817, 0.08719034, -0.16109011, -0.02411379, -0.05638905, -0.37334979, 0.31225419, 0.0744801, 0.27044332)'::cube))
               ->  Parallel Seq Scan on sample  (cost=0.00..64032.31 rows=416665 width=29)
(6 rows)

        title         |      distance      
----------------------+--------------------
 david petrarca       | 0.5866321762629475
 david adamski        | 0.5866321762629475
 richard ansdell      | 0.6239883862603475
 linda darke          | 0.6392124797481789
 ilias tsiliggiris    | 0.6996660649119893
 watson, jim          | 0.7059481479504834
 sk radni%c4%8dki     |   0.71718948226995
 burnham, pa          | 0.7384858030758069
 arthur (europa-park) | 0.7468462897336924
 ivan kecojevic       | 0.7488206082281348
(10 rows)

Time: 381.419 ms

Notice:

  • With Index: 1226.457 ms
  • Without Index: 381.419 ms

This very puzzling behavior! All of it is documented in a GitHub repo so that others can try it. I’ll add documentation about how to generate the embedding vectors, but that shouldn’t be needed, as in the Quick-Start I show that pre-computed embedding vectors can be downloaded from my Google Drive folder.

Addendum

It was asked in the comments below to provide the output of explain (analyze, buffers). Here that is, where

  1. I re-create the (covering) index
  2. I run the query with explain (analyze, buffers)
  3. I drop the index
  4. I run the query with explain (analyze, buffers) again
pgbench=# create index on sample using gist (embedding) include (title);
CREATE INDEX
Time: 51966.315 ms (00:51.966)
pgbench=# 


 Limit  (cost=0.41..4.15 rows=10 width=29) (actual time=3215.956..3216.667 rows=10 loops=1)
   Buffers: shared hit=1439 read=87004 written=7789
   ->  Index Only Scan using sample_embedding_title_idx on sample  (cost=0.41..373768.39 rows=999999 width=29) (actual time=3215.932..3216.441 rows=10 loops=1)
         Order By: (embedding <-> '(0.18936706, -0.12455666, -0.31581765, 0.0192692, -0.07364611, 0.07851536, 0.0290586, -0.02582532, -0.03378124, -0.10564457, -0.03903799, 0.08668878, -0.15357816, -0.17793414, -0.01826405, 0.01969068, 0.11386908, 0.1555583, 0.09368557, 0.13697313, -0.05610929, -0.06536788, -0.12212707, 0.26356605, -0.06004387, -0.01966437, -0.1250324, -0.16645767, -0.13525756, 0.22482251, -0.1709727, 0.28966117, -0.07927769, -0.02498624, -0.10018375, -0.10923951, 0.04770213, 0.11573371, 0.04619929, 0.05216618, 0.19176421, 0.12948817, 0.08719034, -0.16109011, -0.02411379, -0.05638905, -0.37334979, 0.31225419, 0.0744801, 0.27044332)'::cube)
         Heap Fetches: 0
         Buffers: shared hit=1439 read=87004 written=7789
 Planning:
   Buffers: shared hit=14 read=6 dirtied=2
 Planning Time: 0.432 ms
 Execution Time: 3316.266 ms
(10 rows)

Time: 3318.333 ms (00:03.318)
pgbench=# drop index sample_embedding_title_idx;
DROP INDEX
Time: 182.324 ms
pgbench=# 


 Limit  (cost=74036.35..74037.52 rows=10 width=29) (actual time=6052.845..6057.210 rows=10 loops=1)
   Buffers: shared hit=70 read=58830
   ->  Gather Merge  (cost=74036.35..171265.21 rows=833332 width=29) (actual time=6052.825..6057.021 rows=10 loops=1)
         Workers Planned: 2
         Workers Launched: 2
         Buffers: shared hit=70 read=58830
         ->  Sort  (cost=73036.33..74077.99 rows=416666 width=29) (actual time=6002.928..6003.019 rows=8 loops=3)
               Sort Key: ((embedding <-> '(0.18936706, -0.12455666, -0.31581765, 0.0192692, -0.07364611, 0.07851536, 0.0290586, -0.02582532, -0.03378124, -0.10564457, -0.03903799, 0.08668878, -0.15357816, -0.17793414, -0.01826405, 0.01969068, 0.11386908, 0.1555583, 0.09368557, 0.13697313, -0.05610929, -0.06536788, -0.12212707, 0.26356605, -0.06004387, -0.01966437, -0.1250324, -0.16645767, -0.13525756, 0.22482251, -0.1709727, 0.28966117, -0.07927769, -0.02498624, -0.10018375, -0.10923951, 0.04770213, 0.11573371, 0.04619929, 0.05216618, 0.19176421, 0.12948817, 0.08719034, -0.16109011, -0.02411379, -0.05638905, -0.37334979, 0.31225419, 0.0744801, 0.27044332)'::cube))
               Sort Method: top-N heapsort  Memory: 26kB
               Buffers: shared hit=70 read=58830
               Worker 0:  Sort Method: top-N heapsort  Memory: 26kB
               Worker 1:  Sort Method: top-N heapsort  Memory: 26kB
               ->  Parallel Seq Scan on sample  (cost=0.00..64032.33 rows=416666 width=29) (actual time=0.024..3090.103 rows=333333 loops=3)
                     Buffers: shared read=58824
 Planning:
   Buffers: shared hit=3 read=3 dirtied=1
 Planning Time: 0.129 ms
 Execution Time: 6057.388 ms
(18 rows)

Time: 6053.284 ms (00:06.053)