代码之家  ›  专栏  ›  技术社区  ›  lampShadesDrifter

ElasticSearch查询,用于获取图的邻接列表的所有节点(用javascript构建图)

  •  0
  • lampShadesDrifter  · 技术社区  · 7 年前

    在索引中有一组ES文档,每个文档都类似

    {
        currId: '<some id>',
        prevId: '<some id>',
        parents: ['NULL']
    }
    

    其中(为了简化这个示例问题) prevId 只有一个父级时使用,否则使用 parents (事实上,它的设计是这样的还有其他原因)。

    所以一套文件

    {currId: '0', prevId: 'NULL', parents: ['NULL']}
    {currId: '1', prevId: '0', parents: ['NULL']},
    {currId: '2', prevId: '0', parents: ['NULL']},
    {currId: '3', prevId: 'NULL', parents: ['1', '2']},
    {currId: '4', prevId: 'NULL', parents: ['1', '2']}
    

    暗示图形

       |-- 1 -- |__3
    0--         |__
       |-- 2 -- |  4
    

    (如果ascii艺术不是很好(换句话说:0是1和2的父级,它们都是3和4的父级),则表示抱歉)。基本上,每个节点都有两个字段,组合在一起,包含该节点父节点的完整列表。 请注意,索引中可能有其他文档未连接到此处显示的图表,我希望在查询中忽略这些文档。 .

    对ES来说很陌生,所以很难想到从哪里开始考虑这个问题。有没有一种方法,我可以查询得到所有文档,组成一个连接的组件图,只使用知识 单个,随机 该组件中的文档(不一定是以上示例中以零为根的图)(例如,仅给出单个随机选择节点的信息,获取所有其他连接节点)?

    **注意,这是在通过AXIOS向ES服务器发出的JavaScript REST API请求中完成的。( https://github.com/axios/axios ,这样就可以使用for循环之类的东西(但如果可能的话,我会尽量避免这样做,只使用一个查询(尽管我不知道这里的性能权衡是什么)。

    2 回复  |  直到 7 年前
        1
  •  0
  •   Ryan Walker    7 年前

    {
        "query": {
            "bool": {
                "should": [
                        {"term": {"prevId":0}},
                        {"term": {"parents":0}}
                    ]
            }
        }
    }
    
        2
  •  0
  •   lampShadesDrifter    7 年前

    因为我比ElasticSearch查询语言更习惯于JavaScript,所以我最终只使用了一个DFS实现。( https://en.wikipedia.org/wiki/Depth-first_search this.selectedNode 一次是为了得到所有的父母(和其他祖先),另一次是为了得到所有的孩子。代码如下(有点冗长,但需要注意的主要功能是 buildGraph _dfs 在此获取算法的要点(还要注意,使用字段

    async function _getParents(child) {//see https://tutorialzine.com/2017/07/javascript-async-await-explained
        // queries our main DB of nodes to get all nodes n with... 
        // n.currId = child.prevId or child.sources.includes(n.currId)
        let data = {
            "sort": {"timestamp": {"order": "desc"}},
            "query": {
                "bool": {
                    "should": [
                        {"match": {"currId": child.prevId}},
                        //{"match": {"currId": child.sources}}
                    ],
                    "should": child.sources.map((id) => {return {"match": {"currId": id}}})
                }//TODO: use pure ES query to match against ES sources array
            }
        }
    
        let nodes = []
        let res = await axios.get("http://myesserver:9200/myindex/mytype/_search", {
                // for submiting to elasticsearch, see https://stackoverflow.com/a/45292081/8236733
                params: {
                    source: JSON.stringify(data),
                    source_content_type: 'application/json'
                }
            })
    
        console.log('NEW PARENT NODES')
        nodes = res['data']['hits']['hits'].map(record => {return record['_source']} )
        console.log(nodes)
        return nodes
    }
    async function _getChildren(parent) {// see https://tutorialzine.com/2017/07/javascript-async-await-explained
        // queries our main DB of nodes to get all nodes n with... 
        // n.prevId == parent.currId or n.sources.includes(parent.currId) 
        let data = {
            "sort": {"timestamp": {"order": "desc"}},
            "query": {
                "bool": {
                    "should": [
                        {"match": {"prevId": parent.currId}},
                        {"match": {"sources": parent.currId}}
                    ]
                }
            }
        }
    
        let nodes = []
        let res = await axios.get("http://myesserver:9200/myindex/mytype/_search", {
                // for submiting to elasticsearch, see https://stackoverflow.com/a/45292081/8236733
                params: {
                    source: JSON.stringify(data),
                    source_content_type: 'application/json'
                }
            })
    
        console.log('NEW CHILD NODES')
        nodes = res['data']['hits']['hits'].map(record => {return record['_source']} )
        console.log(nodes)
        return nodes
    }
    /**
     *
     *
     * @param {*} root
     * @param {(v)=>{}} stopper callback for conditional to determine whether to visit a node
     * @param {(v)=>{}} visit callback for what to do when visit each node
     * @param {(v)=>{}} getNext callback for how to get the children of a node
     */
    async function _dfs(root, stopper, visit, getNext) {//see https://tutorialzine.com/2017/07/javascript-async-await-explained
        /* getting children */
        /* using DFS method */
        console.log('starting wth root node:')
        console.log(root)
        let s = []
        s.push(root)
        console.log('initialized stack (FIXME: not showing correctly for some reason):')
        console.log(s)
        while (s.length > 0) {
            let v = s.pop()
            console.log('popped:')
            console.log(v)
            console.log(v.sources)
            if (stopper(v)) {
                /* save this node for the graph */
                visit(v)                          
                /* get the children of this node */
                let kids = await getNext(v)                     
                s = s.concat(kids)
                console.log('concated stack:')
                console.log(s)
            }
        }
    }
    /**
     *
     *
     * @param {*} startn the node of the graph that we initially start with
     * @param {*} nodes
     * @param {*} edges
     */
    async function buildGraph(startn, nodes, edges) {
        // Had to do async all the way down b/c of the async axios requests to the ES server
        //see https://tutorialzine.com/2017/07/javascript-async-await-explained
    
        /* getting ancestors */
        // if wanted to get siblings as well, this would be the DFS branch to implement that in
        let discovered = []
        await _dfs(startn, 
            (v)=>{return !discovered.includes(v.currId)}, 
            (v)=>{
                discovered.push(v.currId)
                nodes.push({id: v.currId, label: v.currLocation})
                if (v.prevId != 'NULL') {
                    edges.push({from: v.prevId, to: v.currId})
                } 
                if (v.sources.length > 0 && v.sources[0] != 'NULL') {
                    for (let i=0; i < v.sources.length; i++) {
                        edges.push({from: v.sources[i], to: v.currId})
                    }
                }
            }, 
            (v)=>{return _getParents(v)})
    
        console.log('completed collecting ancestor nodes')
        console.log(nodes)
    
        /* getting children */
        // remove root from discovered, so can do another dfs
        for (var i=discovered.length-1; i>=0; i--) {
            if (discovered[i] === startn.currId) {
                discovered.splice(i, 1);
                // break;       //<-- Uncomment  if only the first term has to be removed
            }
        }
        await _dfs(startn, 
            (v)=>{return !discovered.includes(v.currId)}, 
            (v)=>{
                discovered.push(v.currId)
                // can't add origin node with same id to graph again in react-graph-vis
                if (v.currId != startn.currId) {
                    nodes.push({id: v.currId, label: v.currLocation})
                }
                if (v.prevId != 'NULL') {
                    edges.push({from: v.prevId, to: v.currId})
                } 
                if (v.sources.length > 0 && v.sources[0] != 'NULL') {
                    for (let i=0; i < v.sources.length; i++) {
                        edges.push({from: v.sources[i], to: v.currId})
                    }
                }
            }, 
            (v)=>{return _getChildren(v)})
    
        console.log('completed collecting child nodes')
        console.log(nodes)
    }
    
    let nodes = []
    let edges = []
    buildGraph(this.selectedNode, nodes, edges).then(res => {
        console.log('buildGraph promise result:')
        console.log(res, nodes, edges)
        this.setState({nodes: nodes, edges: edges})
    })