This site
包含有关公共工作者的信息。我对获取、使用
requests
year=2015
关于
Experiencia laboral
ADMINISTRACION DEL PATRIMONIO DE LA BENEFICENCIA PUBLICA
consultar información
我们得到以下面板:
从那里我可以点击号码
250
属于结果数
Experiencia Laboral
最后通过确认下载来下载文件。
params = {
"form:catDepcia_input": "12950:ADMINISTRACION DEL PATRIMONIO DE LA BENEFICENCIA PUBLICA",
"form:aniosSelect_target": "2015",
"javax.faces.partial.ajax": "true",
"javax.faces.source": "form:idBusqArch",
"javax.faces.partial.execute": "form:idBusqArch form:catDepcia form:idDec form:aniosSelect form:IdlstBusq",
"javax.faces.partial.render": "form:idResBusq form:messages form:idPanelDetExistencias",
"form:idBusqArch": "form:idBusqArch"
}
params_2 = {
"javax.faces.partial.ajax": "true",
"javax.faces.source": "form:idButtonConfirmar",
"javax.faces.partial.execute": "form:idButtonConfirmar",
"javax.faces.partial.render": "form:idResBusq",
"form:idButtonConfirmar": "form:idButtonConfirmar"
}
params_3 = {
"javax.faces.partial.ajax": "true",
"javax.faces.source": "form:idDtgrid23:5:idLinkFile",
"javax.faces.partial.execute": "form:idDtgrid23:5:idLinkFile",
"javax.faces.partial.render": "form:idPanelEncZip",
"form:idDtgrid23:5:idLinkFile": "form:idDtgrid23:5:idLinkFile"
}
head = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36",
"X-Requested-With": "XMLHttpRequest",
"Faces-Request": "partial/ajax"
}
url = "http://www.servidorespublicos.gob.mx/registro/datosAbiertos.jsf"
with requests.Session() as s:
init = s.get(url)
soup = BeautifulSoup(init.text, "lxml")
val = soup.find("input", {"id": "j_id1:javax.faces.ViewState:0"})['value']
params["javax.faces.ViewState"] = val
params_2["javax.faces.ViewState"] = val
params_3["javax.faces.ViewState"] = val
r1 = s.post(url, data=params, headers=head)
r2 = s.post(url, data=params_2, headers=head)
r3 = s.post(url, data=params_3, headers=head)
print(r3.content)
当我点击ConfirmDownload时,我可以看到最后一个带有不同标题的POST请求。但是当我试着用我的代码做这件事时,我并没有附上zip文件。为此,我在前面的代码中添加了以下内容:
head_2 = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36",
"Cache-Control": "max-age=0",
"Upgrade-Insecure-Requests": "1",
'Content-Disposition': 'attachment;filename="datosAbiertos.zip"',
"Content-Type": "application/zip",
"Set-Cookie": "primefaces.download=true"
}
params_4 = {
"form:catDepcia_input": "12950:ADMINISTRACION DEL PATRIMONIO DE LA BENEFICENCIA PUBLICA",
"form:aniosSelect_target": "2015",
"form:idButtonConfirmarZip": "",
"form:IdlstBusq_filter": "",
"form:nombreBusq1": ""
}
with requests.Session() as s:
init = s.get(url)
soup = BeautifulSoup(init.text, "lxml")
val = soup.find("input", {"id": "j_id1:javax.faces.ViewState:0"})['value']
params["javax.faces.ViewState"] = val
params_2["javax.faces.ViewState"] = val
params_3["javax.faces.ViewState"] = val
params_4["javax.faces.ViewState"] = val
r1 = s.post(url, data=params, headers=head)
r2 = s.post(url, data=params_2, headers=head)
r3 = s.post(url, data=params_3, headers=head)
r4 = s.post(url, data=params_4, headers=head_2)
print(r4.header['content-type'])
这给了
text/html;charset=UTF-8
而不是
application/zip