博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
实现数据同步 -----将post, get 码云私有仓库
阅读量:5087 次
发布时间:2019-06-13

本文共 5030 字,大约阅读时间需要 16 分钟。

# -*- coding: utf-8 -*-import requestsimport sqlite3import timefrom demo_handle import sql_handlefrom bs4 import BeautifulSoupclass post:    def __init__(self):        self.header = {            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",            "Accept-Encoding": "gzip, deflate, br",            "Accept-Language": "zh-CN,zh;q=0.9",            "Connection": "keep-alive",            "Cookie": "user_locale=zh-CN; oschina_new_user=false; remote_way=http; aliyungf_tc=AQAAAK1x2x/QlgcAbjNye8bI5D9bzrkd; tz=Asia%2FShanghai; Hm_lvt_24f17767262929947cc3631f99bfd274=1543897444,1544083477,1544087096,1544145347; Hm_lpvt_24f17767262929947cc3631f99bfd274=1544151547; gitee-session-n=BAh7C0kiD3Nlc3Npb25faWQGOgZFVEkiJWY2NGViMDQ2NmQ2YzY5MGJmNDkwNDUwNDliNmFiNzQ0BjsAVEkiF21vYnlsZXR0ZV9vdmVycmlkZQY7AEY6CG5pbEkiGXdhcmRlbi51c2VyLnVzZXIua2V5BjsAVFsHWwZpA%2BebI0kiIiQyYSQxMCRjcmsvNGYxODNXSEMvYXo1emJHYk9PBjsAVEkiHXdhcmRlbi51c2VyLnVzZXIuc2Vzc2lvbgY7AFR7BkkiFGxhc3RfcmVxdWVzdF9hdAY7AFRJdToJVGltZQ3irB3Av9CU7Ak6DW5hbm9fbnVtaQIxAToNbmFub19kZW5pBjoNc3VibWljcm8iBzBQOgl6b25lSSIIVVRDBjsAVEkiF2FjdGl2ZV9lbWFpbF9ndWlkZQY7AEZGSSIQX2NzcmZfdG9rZW4GOwBGSSIxUzg5MDE1MFUzMVduK0IvY29FenZ6WG00TnJ0aUNEZm0yVjl2TFFLRzkxaz0GOwBG--9fc346f1c7abce3693c1480171a6914ac7a5aaff",            "Host": "gitee.com",            "Referer": "https://gitee.com/harmony_creation/quotationSystem/blob/master/test.py",            "Upgrade-Insecure-Requests": "1",            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36"        }        self.conn = sqlite3.connect("../db/record_price.db")        self.cursor = self.conn.cursor()        content = self.cursor.execute("select * from product").fetchall()        self.conn.close()        # print(data)        result = requests.get("https://gitee.com/harmony_creation/quotationSystem/edit/master/test.py", headers=self.header)        self.cookie = result.cookies.get_dict()        b1 = BeautifulSoup(result.text, 'html.parser')        self.token = b1.find(name='input', attrs={
'name': "authenticity_token"}).get('value') self.data = { "utf8": "✓", "_method": "put", "authenticity_token": self.token, "commit_message_header": "更新 test.py", "extended_information": "", "last_commit": "", "eol_crlf": "false", "content": "%s" % content } def post_data(self): result = requests.post("https://gitee.com/harmony_creation/quotationSystem/edit/master/test.py", data=self.data, cookies=self.cookie) print(result.status_code)class get: def __init__(self): self.header = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", "Accept-Encoding": "gzip, deflate, br", "Accept-Language": "zh-CN,zh;q=0.9", "Cache-Control": "max-age=0", "Connection": "keep-alive", "Cookie": "user_locale=zh-CN; oschina_new_user=false; remote_way=http; aliyungf_tc=AQAAADjFJiu/5wQA8kTEb8iDuTsmpAD3; tz=Asia%2FShanghai; Hm_lvt_24f17767262929947cc3631f99bfd274=1544153817,1544415700,1544491905,1544497469; Hm_lpvt_24f17767262929947cc3631f99bfd274=1544498693; gitee-session-n=BAh7DEkiD3Nlc3Npb25faWQGOgZFVEkiJThlM2YwM2NiNzE4MDhlNTkzNTAxOTRhYTQ3ZWI2NjA0BjsAVEkiF21vYnlsZXR0ZV9vdmVycmlkZQY7AEY6CG5pbEkiGXdhcmRlbi51c2VyLnVzZXIua2V5BjsAVFsHWwZpA%2BebI0kiIiQyYSQxMCRjcmsvNGYxODNXSEMvYXo1emJHYk9PBjsAVEkiHXdhcmRlbi51c2VyLnVzZXIuc2Vzc2lvbgY7AFR7BkkiFGxhc3RfcmVxdWVzdF9hdAY7AFRJdToJVGltZQ1jrR3AO0%2B5ZAk6DW5hbm9fbnVtaQJ5AzoNbmFub19kZW5pBjoNc3VibWljcm8iB4iQOgl6b25lSSIIVVRDBjsAVEkiF2FjdGl2ZV9lbWFpbF9ndWlkZQY7AEZGSSIQcGhvbmVfZ3VpZGUGOwBGVEkiEF9jc3JmX3Rva2VuBjsARkkiMVBBNzAwczJuNWo3aW5UOUJkekt1ekxxK2VLKzdlQklmeVJhaFllVlpKRDA9BjsARg%3D%3D--304d5a4a63f6a1b1d0980d281b6a789353be8e3e", "Host": "gitee.com", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36" } self.conn = sqlite3.connect("../db/record_price.db") self.cursor = self.conn.cursor() self.cursor.execute("delete from product") self.conn.commit() def get_data(self): result = requests.get("https://gitee.com/harmony_creation/quotationSystem/blob/master/test.py", headers= self.header) print(result.status_code) # print(result.text) b2 = BeautifulSoup(result.text, "html.parser") content = b2.select(".line") data_list = eval(content[0].text) # obj = for content in data_list: sql_handle.sqlit_handle().edit_add(content) self.conn.commit() self.conn.close()# post().post_data()# import time# time.sleep(5)# get().get_data()

 在爬虫递归操作被拒绝时,可采取需要爬取的页面刷新捕捉Network里面的请求包,拿取requestheaders全部写入即可

转载于:https://www.cnblogs.com/cjj-zyj/p/10107377.html

你可能感兴趣的文章
分布式系统介绍
查看>>
Linq的Join == 两个foreach
查看>>
python小白-day15 web框架
查看>>
JVM 垃圾回收机制(1)
查看>>
JAVA转C#开发笔记
查看>>
AngularJs从数据库获取数据并显示
查看>>
JQuery加载html网页
查看>>
ES6的let命令实现猜想
查看>>
VM模板引擎语法
查看>>
UpdatePanel上使用FileUpload上传文件
查看>>
[工具] Altova UModel® 2017 is a UML tool for software modeling & application development.
查看>>
plsql连接Oracle11g 64位数据库导出dmp文件一闪而过
查看>>
易观算法大赛心得
查看>>
公共子序列与公共子串问题
查看>>
Hadoop1.x与2.x安装笔记
查看>>
1029: [JSOI2007]建筑抢修 - BZOJ
查看>>
redis与mysql的比较
查看>>
关于几种编程语言的介绍————Java、Python等
查看>>
windows phone开发-windows azure mobile service使用入门
查看>>
gis 参照资料
查看>>