如何在 python 脚本中获取多个分页 api?
•浏览 1
How to get a multiple pagination api in a python script?
我学习python,我想使用api。
我制作小程序来获取信息,并阻止我的下一个程序。
我想制作一个程序来抓取我学校 api 上的所有学生。
问题是限制分页,我有 15 个用户,但不是全部。
我在文档上搜索如何使用。
我尝试使用循环"for"和特定的 api 参数获取信息:
The Link HTTP response header contains pagination data with first,
previous, next and last raw pages links when available, under the
format
link: http://xxx.intra.xxx.fr/v2/{Resource}?page=X+1; rel="next",
http://xxx.intra.xxx.fr/v2/{Resource}?page=X-1; rel="prev",
http://xxx.intra.xxx.fr/v2/{Resource}?page=1; rel="first",
http://xxx.intra.xxx.fr/v2/{Resource}?page=X+n; rel="last"
问题,我不知道要抓取所有页面中的所有信息,而不仅仅是第一页。
我的程序代码:
import requests
import json
import sys
from sys import argv
from operator import itemgetter, attrgetter
import json
import os
UID = 'REDACTED_USER_ID'
SECRET = 'REDACTED_SECRET_KEY'
class CONST(object):
try:
campus = sys.argv[1]
month = sys.argv[2]
year = sys.argv[3]
except:
print("Error : aucun argument")
print('Usage:"campus_id" (EX:1)"month" (july)"year" (2016)')
exit()
d = {'grant_type': 'client_credentials', 'client_id': UID, 'client_secret': SECRET}
r = requests.post("https://api.intra.xxx.fr/oauth/token", data=d)
token = r.json()['access_token']
print(token)
h = {'Authorization': 'Bearer ' + token}
get_students = requests.get('https://api.intra.xxx.fr/v2/cursus/1/users?filter[primary_campus_id]=' + campus + '&filter[pool_month]=' + month + '&filter[pool_year]=' + year + '&sort=login', headers=h)
def __setattr__(self, *_):
pass
CONST = CONST()
def campus_id(CAMPUS, MONTH, YEAR, *args):
campus = CAMPUS
if str.isnumeric(campus):
return(campus)
else:
print("Error : campus_id requiert un chiffre")
print('Usage:"campus_id" (EX:1)"month" (july)"year" (2016)')
exit()
def pool_month(CAMPUS, MONTH, YEAR, *args):
month = MONTH
if str.isalpha(month):
return(month)
else:
print("Error : pool_month requiert un mois en lettre")
print('Usage:"campus_id" (EX:1)"month" (july)"year" (2016)')
exit()
def pool_year(CAMPUS, MONTH, YEAR, *args):
year = YEAR
if str.isnumeric(year):
return(year)
else:
print("Error : pool_year requiert au moins un chiffre")
print('Usage:"campus_id" (EX:1)"month" (july)"year" (2016)')
exit()
def get_infos(CAMPUS, MONTH, YEAR, *args):
if len(argv) != 4:
print('Wrongs arguments !')
print('Usage:"campus_id" (EX:1)"month" (july)"year" (2016)')
exit()
campus = campus_id(*argv[1:], *argv[2:], *argv[3:])
month = pool_month(*argv[1:], *argv[2:], *argv[3:])
year = pool_year(*argv[1:], *argv[2:], *argv[3:])
get_level = CONST.get_students
# print(json.dumps(get_level.json(),indent=4))
level = get_level.json()
tab = []
for item in level:
tab.append(item['login'])
chain = ' \
'.join(tab)
fichier = open("test.out","w")
fichier = open("test.out","a")
fichier.write(chain +"\
")
fichier.close()
if __name__ == '__main__':
test = get_infos(*argv[1:], *argv[2:], *argv[3:])
print(test)
for item in level:
tab.append(item['login'])
> api-test python3 api-test3.py"1""september""2017"
> api-test
> cat test.out
aaiche
abaille
abezanni
abouquet
acourtin
adfourca
adpusel
# ... earlier stuff
h = {'Authorization': 'Bearer ' + token}
params = {
'filter[primary_campus_id]': campus,
'filter[pool_month]': month,
'filter[pool_year]': year,
'sort': 'login'
}
get_students = requests.get('https://api.intra.xxx.fr/v2/cursus/1/users', params=params, headers=h)
# and so on ...
params = { ... } # as before
for page_idx in range(number_of_pages):
params['page'] = page_idx
get_students_page = requests.get(...)
# combine the outputs somehow, but that depends on the response
# and so on...
代码:
import requests
import json
import sys
from sys import argv
from operator import itemgetter, attrgetter
import json
import os
UID = 'REDACTED_USER_ID'
SECRET = 'REDACTED_SECRET_KEY'
class CONST(object):
try:
campus = sys.argv[1]
month = sys.argv[2]
year = sys.argv[3]
except:
print("Error : aucun argument")
print('Usage:"campus_id" (EX:1)"month" (july)"year" (2016)')
exit()
d = {'grant_type': 'client_credentials', 'client_id': UID, 'client_secret': SECRET}
r = requests.post("https://api.intra.xxx.fr/oauth/token", data=d)
token = r.json()['access_token']
print(token)
h = {'Authorization': 'Bearer ' + token}
get_students = requests.get('https://api.intra.xxx.fr/v2/cursus/1/users?filter[primary_campus_id]=' + campus + '&filter[pool_month]=' + month + '&filter[pool_year]=' + year + '&sort=login', headers=h)
def __setattr__(self, *_):
pass
CONST = CONST()
def campus_id(CAMPUS, MONTH, YEAR, *args):
campus = CAMPUS
if str.isnumeric(campus):
return(campus)
else:
print("Error : campus_id requiert un chiffre")
print('Usage:"campus_id" (EX:1)"month" (july)"year" (2016)')
exit()
def pool_month(CAMPUS, MONTH, YEAR, *args):
month = MONTH
if str.isalpha(month):
return(month)
else:
print("Error : pool_month requiert un mois en lettre")
print('Usage:"campus_id" (EX:1)"month" (july)"year" (2016)')
exit()
def pool_year(CAMPUS, MONTH, YEAR, *args):
year = YEAR
if str.isnumeric(year):
return(year)
else:
print("Error : pool_year requiert au moins un chiffre")
print('Usage:"campus_id" (EX:1)"month" (july)"year" (2016)')
exit()
def get_infos(CAMPUS, MONTH, YEAR, *args):
if len(argv) != 4:
print('Wrongs arguments !')
print('Usage:"campus_id" (EX:1)"month" (july)"year" (2016)')
exit()
campus = campus_id(*argv[1:], *argv[2:], *argv[3:])
month = pool_month(*argv[1:], *argv[2:], *argv[3:])
year = pool_year(*argv[1:], *argv[2:], *argv[3:])
get_level = CONST.get_students
# print(json.dumps(get_level.json(),indent=4))
level = get_level.json()
tab = []
for item in level:
tab.append(item['login'])
chain = ' \
'.join(tab)
fichier = open("test.out","w")
fichier = open("test.out","a")
fichier.write(chain +"\
")
fichier.close()
if __name__ == '__main__':
test = get_infos(*argv[1:], *argv[2:], *argv[3:])
print(test)
for item in level:
tab.append(item['login'])
> api-test python3 api-test3.py"1""september""2017"
> api-test
> cat test.out
aaiche
abaille
abezanni
abouquet
acourtin
adfourca
adpusel
# ... earlier stuff
h = {'Authorization': 'Bearer ' + token}
params = {
'filter[primary_campus_id]': campus,
'filter[pool_month]': month,
'filter[pool_year]': year,
'sort': 'login'
}
get_students = requests.get('https://api.intra.xxx.fr/v2/cursus/1/users', params=params, headers=h)
# and so on ...
params = { ... } # as before
for page_idx in range(number_of_pages):
params['page'] = page_idx
get_students_page = requests.get(...)
# combine the outputs somehow, but that depends on the response
# and so on...
是我找到的登录名的数组列表(例如:暂时我已经 "a" "b" "c" "d"),我想获取所有用户
控制台示例:
import requests
import json
import sys
from sys import argv
from operator import itemgetter, attrgetter
import json
import os
UID = 'REDACTED_USER_ID'
SECRET = 'REDACTED_SECRET_KEY'
class CONST(object):
try:
campus = sys.argv[1]
month = sys.argv[2]
year = sys.argv[3]
except:
print("Error : aucun argument")
print('Usage:"campus_id" (EX:1)"month" (july)"year" (2016)')
exit()
d = {'grant_type': 'client_credentials', 'client_id': UID, 'client_secret': SECRET}
r = requests.post("https://api.intra.xxx.fr/oauth/token", data=d)
token = r.json()['access_token']
print(token)
h = {'Authorization': 'Bearer ' + token}
get_students = requests.get('https://api.intra.xxx.fr/v2/cursus/1/users?filter[primary_campus_id]=' + campus + '&filter[pool_month]=' + month + '&filter[pool_year]=' + year + '&sort=login', headers=h)
def __setattr__(self, *_):
pass
CONST = CONST()
def campus_id(CAMPUS, MONTH, YEAR, *args):
campus = CAMPUS
if str.isnumeric(campus):
return(campus)
else:
print("Error : campus_id requiert un chiffre")
print('Usage:"campus_id" (EX:1)"month" (july)"year" (2016)')
exit()
def pool_month(CAMPUS, MONTH, YEAR, *args):
month = MONTH
if str.isalpha(month):
return(month)
else:
print("Error : pool_month requiert un mois en lettre")
print('Usage:"campus_id" (EX:1)"month" (july)"year" (2016)')
exit()
def pool_year(CAMPUS, MONTH, YEAR, *args):
year = YEAR
if str.isnumeric(year):
return(year)
else:
print("Error : pool_year requiert au moins un chiffre")
print('Usage:"campus_id" (EX:1)"month" (july)"year" (2016)')
exit()
def get_infos(CAMPUS, MONTH, YEAR, *args):
if len(argv) != 4:
print('Wrongs arguments !')
print('Usage:"campus_id" (EX:1)"month" (july)"year" (2016)')
exit()
campus = campus_id(*argv[1:], *argv[2:], *argv[3:])
month = pool_month(*argv[1:], *argv[2:], *argv[3:])
year = pool_year(*argv[1:], *argv[2:], *argv[3:])
get_level = CONST.get_students
# print(json.dumps(get_level.json(),indent=4))
level = get_level.json()
tab = []
for item in level:
tab.append(item['login'])
chain = ' \
'.join(tab)
fichier = open("test.out","w")
fichier = open("test.out","a")
fichier.write(chain +"\
")
fichier.close()
if __name__ == '__main__':
test = get_infos(*argv[1:], *argv[2:], *argv[3:])
print(test)
for item in level:
tab.append(item['login'])
> api-test python3 api-test3.py"1""september""2017"
> api-test
> cat test.out
aaiche
abaille
abezanni
abouquet
acourtin
adfourca
adpusel
# ... earlier stuff
h = {'Authorization': 'Bearer ' + token}
params = {
'filter[primary_campus_id]': campus,
'filter[pool_month]': month,
'filter[pool_year]': year,
'sort': 'login'
}
get_students = requests.get('https://api.intra.xxx.fr/v2/cursus/1/users', params=params, headers=h)
# and so on ...
params = { ... } # as before
for page_idx in range(number_of_pages):
params['page'] = page_idx
get_students_page = requests.get(...)
# combine the outputs somehow, but that depends on the response
# and so on...
使用 requests.get 函数,您应该在 params 关键字参数中传递查询参数。
所以你会想要像
这样的东西
import requests
import json
import sys
from sys import argv
from operator import itemgetter, attrgetter
import json
import os
UID = 'REDACTED_USER_ID'
SECRET = 'REDACTED_SECRET_KEY'
class CONST(object):
try:
campus = sys.argv[1]
month = sys.argv[2]
year = sys.argv[3]
except:
print("Error : aucun argument")
print('Usage:"campus_id" (EX:1)"month" (july)"year" (2016)')
exit()
d = {'grant_type': 'client_credentials', 'client_id': UID, 'client_secret': SECRET}
r = requests.post("https://api.intra.xxx.fr/oauth/token", data=d)
token = r.json()['access_token']
print(token)
h = {'Authorization': 'Bearer ' + token}
get_students = requests.get('https://api.intra.xxx.fr/v2/cursus/1/users?filter[primary_campus_id]=' + campus + '&filter[pool_month]=' + month + '&filter[pool_year]=' + year + '&sort=login', headers=h)
def __setattr__(self, *_):
pass
CONST = CONST()
def campus_id(CAMPUS, MONTH, YEAR, *args):
campus = CAMPUS
if str.isnumeric(campus):
return(campus)
else:
print("Error : campus_id requiert un chiffre")
print('Usage:"campus_id" (EX:1)"month" (july)"year" (2016)')
exit()
def pool_month(CAMPUS, MONTH, YEAR, *args):
month = MONTH
if str.isalpha(month):
return(month)
else:
print("Error : pool_month requiert un mois en lettre")
print('Usage:"campus_id" (EX:1)"month" (july)"year" (2016)')
exit()
def pool_year(CAMPUS, MONTH, YEAR, *args):
year = YEAR
if str.isnumeric(year):
return(year)
else:
print("Error : pool_year requiert au moins un chiffre")
print('Usage:"campus_id" (EX:1)"month" (july)"year" (2016)')
exit()
def get_infos(CAMPUS, MONTH, YEAR, *args):
if len(argv) != 4:
print('Wrongs arguments !')
print('Usage:"campus_id" (EX:1)"month" (july)"year" (2016)')
exit()
campus = campus_id(*argv[1:], *argv[2:], *argv[3:])
month = pool_month(*argv[1:], *argv[2:], *argv[3:])
year = pool_year(*argv[1:], *argv[2:], *argv[3:])
get_level = CONST.get_students
# print(json.dumps(get_level.json(),indent=4))
level = get_level.json()
tab = []
for item in level:
tab.append(item['login'])
chain = ' \
'.join(tab)
fichier = open("test.out","w")
fichier = open("test.out","a")
fichier.write(chain +"\
")
fichier.close()
if __name__ == '__main__':
test = get_infos(*argv[1:], *argv[2:], *argv[3:])
print(test)
for item in level:
tab.append(item['login'])
> api-test python3 api-test3.py"1""september""2017"
> api-test
> cat test.out
aaiche
abaille
abezanni
abouquet
acourtin
adfourca
adpusel
# ... earlier stuff
h = {'Authorization': 'Bearer ' + token}
params = {
'filter[primary_campus_id]': campus,
'filter[pool_month]': month,
'filter[pool_year]': year,
'sort': 'login'
}
get_students = requests.get('https://api.intra.xxx.fr/v2/cursus/1/users', params=params, headers=h)
# and so on ...
params = { ... } # as before
for page_idx in range(number_of_pages):
params['page'] = page_idx
get_students_page = requests.get(...)
# combine the outputs somehow, but that depends on the response
# and so on...
要特别回答您的分页问题,??您需要在 params 字典中添加一个额外的 page 参数。所以你可以添加一个循环
import requests
import json
import sys
from sys import argv
from operator import itemgetter, attrgetter
import json
import os
UID = 'REDACTED_USER_ID'
SECRET = 'REDACTED_SECRET_KEY'
class CONST(object):
try:
campus = sys.argv[1]
month = sys.argv[2]
year = sys.argv[3]
except:
print("Error : aucun argument")
print('Usage:"campus_id" (EX:1)"month" (july)"year" (2016)')
exit()
d = {'grant_type': 'client_credentials', 'client_id': UID, 'client_secret': SECRET}
r = requests.post("https://api.intra.xxx.fr/oauth/token", data=d)
token = r.json()['access_token']
print(token)
h = {'Authorization': 'Bearer ' + token}
get_students = requests.get('https://api.intra.xxx.fr/v2/cursus/1/users?filter[primary_campus_id]=' + campus + '&filter[pool_month]=' + month + '&filter[pool_year]=' + year + '&sort=login', headers=h)
def __setattr__(self, *_):
pass
CONST = CONST()
def campus_id(CAMPUS, MONTH, YEAR, *args):
campus = CAMPUS
if str.isnumeric(campus):
return(campus)
else:
print("Error : campus_id requiert un chiffre")
print('Usage:"campus_id" (EX:1)"month" (july)"year" (2016)')
exit()
def pool_month(CAMPUS, MONTH, YEAR, *args):
month = MONTH
if str.isalpha(month):
return(month)
else:
print("Error : pool_month requiert un mois en lettre")
print('Usage:"campus_id" (EX:1)"month" (july)"year" (2016)')
exit()
def pool_year(CAMPUS, MONTH, YEAR, *args):
year = YEAR
if str.isnumeric(year):
return(year)
else:
print("Error : pool_year requiert au moins un chiffre")
print('Usage:"campus_id" (EX:1)"month" (july)"year" (2016)')
exit()
def get_infos(CAMPUS, MONTH, YEAR, *args):
if len(argv) != 4:
print('Wrongs arguments !')
print('Usage:"campus_id" (EX:1)"month" (july)"year" (2016)')
exit()
campus = campus_id(*argv[1:], *argv[2:], *argv[3:])
month = pool_month(*argv[1:], *argv[2:], *argv[3:])
year = pool_year(*argv[1:], *argv[2:], *argv[3:])
get_level = CONST.get_students
# print(json.dumps(get_level.json(),indent=4))
level = get_level.json()
tab = []
for item in level:
tab.append(item['login'])
chain = ' \
'.join(tab)
fichier = open("test.out","w")
fichier = open("test.out","a")
fichier.write(chain +"\
")
fichier.close()
if __name__ == '__main__':
test = get_infos(*argv[1:], *argv[2:], *argv[3:])
print(test)
for item in level:
tab.append(item['login'])
> api-test python3 api-test3.py"1""september""2017"
> api-test
> cat test.out
aaiche
abaille
abezanni
abouquet
acourtin
adfourca
adpusel
# ... earlier stuff
h = {'Authorization': 'Bearer ' + token}
params = {
'filter[primary_campus_id]': campus,
'filter[pool_month]': month,
'filter[pool_year]': year,
'sort': 'login'
}
get_students = requests.get('https://api.intra.xxx.fr/v2/cursus/1/users', params=params, headers=h)
# and so on ...
params = { ... } # as before
for page_idx in range(number_of_pages):
params['page'] = page_idx
get_students_page = requests.get(...)
# combine the outputs somehow, but that depends on the response
# and so on...