代码之家  ›  专栏  ›  技术社区  ›  Wizhi

用VBA刮Web后删除Web连接时出现错误80010108

  •  0
  • Wizhi  · 技术社区  · 6 年前

    我有以下代码从Web表中检索一些数据。

    Sub Retrieve_ticker_list()
    
        Dim Stockticker As Long                      'loopvalue (URL link) you want to use
    
        Dim DownloadInfoSheet As Worksheet
        Set DownloadInfoSheet = ActiveWorkbook.Worksheets("DownloadInfo")
    
        Dim DataSheet As Worksheet
        Set DataSheet = ActiveWorkbook.Worksheets("Data")
    
        Dim lastrowStock As Long
        Dim lastrowG As Long
    
        Dim baseURL As String
        Dim searchResultsURL As String
    
        lastrowStock = DownloadInfoSheet.Cells(Rows.Count, "C").End(xlUp).Row 'Find last row in Stockticker
        lastrowG = DataSheet.Cells(Rows.Count, "A").End(xlUp).Row + 10 'Find last row in range PART3
    
    
        For Stockticker = 2 To lastrowStock          'Loop from page 2 to lastrow
    
            baseURL = DownloadInfoSheet.Cells(2, "A") 'download from cell A2: 
            searchResultsURL = baseURL & DownloadInfoSheet.Cells(Stockticker, "C").Value 'Add the ticker symbol to the original URL link, example             
            With DataSheet _
                 .QueryTables.Add(Connection:="URL;" & searchResultsURL, Destination:=DataSheet.Range(DataSheet.Cells(1, "A"), DataSheet.Cells(lastrowG, "A")))
                .Name = _
                      "Stock Data"
                .BackgroundQuery = True
                .RefreshStyle = xlOverwriteCells
                .TablesOnlyFromHTML = True
                .WebSelectionType = xlSpecifiedTables
                .WebTables = """Rf"""
                .PreserveFormatting = True
                .Refresh BackgroundQuery:=False
                Call Delete_Query_Content_Data 'See code below. I have tried to have this inside and outside the "with" loop
            End With
    
            Call RunProcess 'calculate adjusted key-ratios
        Next Stockticker
    
    End Sub
    

    尝试删除连接时出现问题。如果Web表存在并被粘贴到工作簿中,那么我可以删除与下面代码的连接,而不会有问题。

    但是,当URL不正确(stockticker名称无效)时,代码会起作用(粘贴空白数据),但我无法删除连接。我可以手动转到“数据”->“连接”->“删除”,但它不适用于代码。连接未被移除(如果 lastrowG = 1 )或者我得到以下错误( lastrowG = ... row + 10 ):

    enter image description here

    删除表查询连接的代码:

    Sub Delete_Query_Content_Data()
    ' This code works when the URL code is valid, however if the code has an invalid stockticker (i.e. ADPA)
     'it doesn't remove the connection (if I set lastrowG = 1) otherwise it mostly gives the error message 80010108 
    'Clear Web Query for "Stock data"
    Sheets("Data").Activate
    Range("A1").Select
    Selection.QueryTable.Delete
    Selection.ClearContents
    
    End Sub
    

    我想换一个 Delete_Query_Content_Data 有StackOverflow的建议(示例“ Killing connection in EXCEL vba “和” Excel VBA Export to Excel - Removing Connections “)但是它们都不能解决我的问题,我仍然收到错误消息。

    2 回复  |  直到 6 年前
        1
  •  1
  •   QHarr    6 年前

    我将研究实际的XMLHTTP请求作为更快的检索方法。目前,虽然有点不寻常,但请看到你的答案有结构性的重写,并附上一些注释。

    注:

    1)将IE对象移出循环并在循环前可见。其他一些不受循环影响的变量也一样,例如baseurl。

    2)将过去52周的高、低值选择减少到使用CSS选择器以适当的元素为目标。

    3)在适当情况下与语句一起使用,例如用于确定lastrowstocktickerpe

    4)取消不必要的额外等待

    5)移除集合=当超出范围时,将取消引用任何不需要的对象。

    Option Explicit  
    Public Sub Retrieve_PE_Low_High()
        Dim DownloadInfoSheet As Worksheet, OutputSheet As Worksheet
        Dim Stockticker As Long, lastrowStockTickerPE As Long
        Dim baseURL As String, searchResultsURL As String
        Dim HTMLDoc As HTMLDocument, oIE As InternetExplorer
    
        Set DownloadInfoSheet = ActiveWorkbook.Worksheets("DownloadInfo") 'Sheet to retrieve stockticker from
        Set OutputSheet = ActiveWorkbook.Worksheets("Output") 'Where data will be assigned
        Set oIE = New InternetExplorer
        baseURL = DownloadInfoSheet.Cells(3, "A")
    
        With DownloadInfoSheet
            lastrowStockTickerPE = .Cells(.Rows.Count, "D").End(xlUp).Row
        End With
    
        With oIE
            .Visible = True
    
            For Stockticker = lastrowStockTickerPE To lastrowStockTickerPE '<==presumably your endpoint is not always the same as start
    
                searchResultsURL = baseURL & DownloadInfoSheet.Cells(Stockticker, "C").Value 'Add the ticker symbol to the original URL link, example "https://www.nasdaq.com/symbol" + "ADP" = "https://www.nasdaq.com/symbol/ADP"
    
                .Navigate2 searchResultsURL
    
                While .Busy Or .readyState < 4: DoEvents: Wend
    
                Set HTMLDoc = .document
    
                Dim high As Object, low As Object
                With HTMLDoc
                    On Error Resume Next
                    Set high = .querySelector(".infoTable.trading-activitiy tr + tr td:last-child")
                    Set low = .querySelector(".infoTable.trading-activitiy tr + tr + tr td:last-child")
                    Debug.Print high.innerText, low.innerText
                    On Error GoTo 0
                    If high Is Nothing Or low Is Nothing Then
                        'dummy
                    Else
                        'other code to write to sheet
                    End If
                End With
                Set high = Nothing: low = Nothing
            Next Stockticker
            .Quit
        End With
    End Sub
    

    一个示例xmlhttp请求,您可以使用上面的IDEA将其适应到循环中。有趣的是,针对元素的CSS选择器必须稍微调整一下。

    Option Explicit   
    Public Sub GetInfo()
        Dim sResponse As String, html As HTMLDocument, high As Object, low As Object
        Set html = New HTMLDocument
        With CreateObject("MSXML2.XMLHTTP")
            .Open "GET", "https://www.nasdaq.com/symbol/AAPL", False
            .send
            html.body.innerHTML = StrConv(.responseBody, vbUnicode)
        End With
    
        On Error Resume Next
        Set high = html.querySelector(".infoTable.trading-activitiy tr + tr td + td")
        Set low = html.querySelector(".infoTable.trading-activitiy tr + tr + tr td + td")
        Debug.Print high.innerText, low.innerText
        On Error GoTo 0
    End Sub
    
        2
  •  0
  •   Wizhi    6 年前

    如果有其他人能从中受益,我会强烈建议你寻找这个职位: web scraping with vba using XMLHTTP

    这是我对格雷厄姆·安德森提供的代码的解释。

    我补充说:

    • 循环URL地址扩展(即nasdaq.com/symbol/loop this ticker)。
    • 添加了一个简单的错误处理程序(它会跳过错误,留下注释并继续),以避免中断。
    • 指示代码仅将特定元素复制回工作表。(节省时间,而不是打印整个表,然后查找要使用的值)

    与Excel Web导入(问题中的代码)相比,使用HTML/XMLHTTP的好处在于 数值 直接正确识别 . 与 QueryTables 因为数字是美国格式,所以我丢失了零。 . “当我使用时用作分隔符” , “”。通过下面的代码,这些数字从一开始就可以很好地显示出来,这节省了很多时间。

    Sub Retrieve_PE_Low_High()
    Dim Stockticker As Long 'loopvalue (URL extension to link) you want to use
    Dim DownloadInfoSheet As Worksheet
    Set DownloadInfoSheet = ActiveWorkbook.Worksheets("DownloadInfo") 'Sheet to retrieve stockticker from
    
    Dim OutputSheet As Worksheet
    Set OutputSheet = ActiveWorkbook.Worksheets("Output") 'Where data will be assigned
    Dim lastrowB As Long
    
    Dim lastrowStockTickerPE As Long
    Dim lastrowStockPE As Long
    
    Dim baseURL As String
    Dim searchResultsURL As String
    
    lastrowStockTickerPE = DownloadInfoSheet.Cells(Rows.Count, "D").End(xlUp).Row 'Find last row in Stockticker
    
    For Stockticker = lastrowStockTickerPE To lastrowStockTickerPE 'Loop from page 2 to lastrow
        baseURL = DownloadInfoSheet.Cells(3, "A") 'download from cell A2: https://www.nasdaq.com/symbol
        searchResultsURL = baseURL & DownloadInfoSheet.Cells(Stockticker, "C").Value 'Add the ticker symbol to the original URL link, example "https://www.nasdaq.com/symbol" + "ADP" = "https://www.nasdaq.com/symbol/ADP"
    
        '#Microsoft HTML Object Library - Activate by Tools -> References
        '#Microsoft Internet Controls - Activate by Tools -> References
        Dim HTMLDoc As New HTMLDocument
        Dim AnchorLinks As Object
        Dim TDelements As Object
        Dim tdElement As Object
        Dim AnchorLink As Object
        Dim lRow As Long
        Dim lCol As Long
        Dim oElement As Object
        Dim i As Integer
    
        Dim oIE As InternetExplorer
    
        Set oIE = New InternetExplorer
    
        oIE.navigate searchResultsURL
        oIE.Visible = True
    
        'Wait for IE to load the web page
        Do Until (oIE.readyState = 4 And Not oIE.Busy)
            DoEvents
        Loop
    
        'Wait for Javascript to run
        Application.Wait (Now + TimeValue("0:00:15"))
    
        HTMLDoc.body.innerHTML = oIE.document.body.innerHTML
    
    
        With HTMLDoc.body
            Set AnchorLinks = HTMLDoc.getElementsByClassName("infoTable trading-activitiy") 'The "getElementsByClassName" is found by righ-click the element you want to retrieve data. This is the higher node (table)
    
            For Each AnchorLink In AnchorLinks
                'Debug.Print AnchorLink.innerText
                Set TDelements = AnchorLink.getElementsByTagName("td") 'The "getElementsByTagName" is found by righ-click the element you want to retrieve data. This is the lower node (single value)
            Next AnchorLink
    
            'lRow = 1
            'Print complete table of "infoTable trading-activitiy" to see what each element has for row.
            'If "High/Low 52 wk price" prints out at row 99, then the element index number is 98.
            'You could also search for items by: Debug.Print TDelements.Item(i).innerText, where i = a number
            'For Each tdElement In TDelements
            '    Debug.Print tdElement.innerText
            '    Cells(lRow, 1).Value = tdElement.innerText
            '    lRow = lRow + 1
            'Next tdElement
    
            If TDelements Is Nothing Then
                Call Dummy_PE                    'If object "TDelements is not populated/nothing (i.e. URL is not working or getElementsByClassName is not found) go to Dummy_PE
            Else
                lastrowStockPE = OutputSheet.Cells(Rows.Count, "G").End(xlUp).Row 'Find last row in Stockticker
                For i = 5 To 3 Step -1           'Loop through the TDelements items 5 to 3
                    Select Case i
                    Case 3, 5                    'For TDelements items 3 and 5, copy those to the sheet
                        'Debug.Print TDelements.Item(i).innerText
                        OutputSheet.Cells(lastrowStockPE - 1, 6).Value = TDelements.Item(i).innerText
                        OutputSheet.Cells(lastrowStockPE - 1, 6).NumberFormat = "General"
                        OutputSheet.Cells(lastrowStockPE - 1, 7).ClearContents
                        If OutputSheet.Cells(lastrowStockPE - 1, 6).Value = "" Then
                            OutputSheet.Cells(lastrowStockPE - 1, 2).Font.Color = vbRed
                        End If
                        lastrowStockPE = lastrowStockPE + 1
                    End Select
                Next i
    
            End If
    
        End With
    
        oIE.Quit
    
        Set AnchorLinks = Nothing
        Set AnchorLink = Nothing
        Set TDelements = Nothing
        Set tdElement = Nothing
        Set HTMLDoc = Nothing
        Set olE = Nothing
    
    Next Stockticker
    End Sub