{"id":259,"date":"2021-03-20T08:08:33","date_gmt":"2021-03-20T00:08:33","guid":{"rendered":"http:\/\/blog.yuekegu.com\/?p=259"},"modified":"2021-03-20T08:08:33","modified_gmt":"2021-03-20T00:08:33","slug":"python%e7%88%ac%e5%8f%96%e8%af%95%e9%a2%98%e4%bf%a1%e6%81%af-%e4%bf%9d%e5%ad%98%e6%96%87%e6%9c%ac%e5%b9%b6%e5%88%a9%e7%94%a8%e6%ad%a3%e5%88%99%e8%a1%a8%e8%be%be%e5%bc%8f%e8%8e%b7%e5%8f%96%e6%8c%87","status":"publish","type":"post","link":"https:\/\/book.yuekegu.com\/index.php\/2021\/03\/20\/python%e7%88%ac%e5%8f%96%e8%af%95%e9%a2%98%e4%bf%a1%e6%81%af-%e4%bf%9d%e5%ad%98%e6%96%87%e6%9c%ac%e5%b9%b6%e5%88%a9%e7%94%a8%e6%ad%a3%e5%88%99%e8%a1%a8%e8%be%be%e5%bc%8f%e8%8e%b7%e5%8f%96%e6%8c%87\/","title":{"rendered":"python\u722c\u53d6\u8bd5\u9898\u4fe1\u606f-\u4fdd\u5b58\u6587\u672c\u5e76\u5229\u7528\u6b63\u5219\u8868\u8fbe\u5f0f\u83b7\u53d6\u6307\u5b9a\u5b57\u6bb5\u5e76\u4fdd\u5b58mysql\u6570\u636e\u5e93"},"content":{"rendered":"\n<p> \u539f\u6587\u94fe\u63a5\uff1ahttps:\/\/blog.csdn.net\/weixin_44648900\/article\/details\/105196981<\/p>\n\n\n\n<p>\u75ab\u60c5\u671f\u95f4\u65e0\u804a\u7a81\u53d1\u5947\u60f3\u60f3\u8981\u505a\u4e00\u4e2a\u5728\u7ebf\u8003\u8bd5\u7cfb\u7edf\uff0c\u76ee\u524d\u5df2\u5b8c\u6210\u6570\u636e\u5e93\u8bbe\u8ba1\uff0c\u5f00\u59cb\u7f16\u5199\u722c\u866b\u722c\u53d6\u8bd5\u9898\u6570\u636e\uff0c\u76ee\u6807\u7f51\u7ad9\u5982\u4e0b\uff0c\u83b7\u53d6\u5185\u5bb9\u5305\u62ec\uff1a\u8003\u70b9\uff0c\u8bd5\u9898\uff0c\u7b54\u6848\u9009\u9879\uff0c\u7b54\u6848\uff0c\u89e3\u6790\u3002\u8003\u70b9\u5b57\u6bb5\u7684\u83b7\u53d6\u4fbf\u4e8e\u4ee5\u540e\u7cfb\u7edf\u4e2a\u6027\u5316\u63a8\u8350\u7684\u9700\u8981\u3002\ud83d\ude94<\/p>\n\n\n\n<figure class=\"wp-block-image\"><img decoding=\"async\" src=\"https:\/\/img-blog.csdnimg.cn\/20200330132423818.png?x-oss-process=image\/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80NDY0ODkwMA==,size_16,color_FFFFFF,t_70#pic_center\" alt=\"\u5728\u8fd9\u91cc\u63d2\u5165\u56fe\u7247\u63cf\u8ff0\"\/><\/figure>\n\n\n\n<figure class=\"wp-block-image\"><img decoding=\"async\" src=\"https:\/\/img-blog.csdnimg.cn\/2020033013245369.png?x-oss-process=image\/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80NDY0ODkwMA==,size_16,color_FFFFFF,t_70#pic_center\" alt=\"\u5728\u8fd9\u91cc\u63d2\u5165\u56fe\u7247\u63cf\u8ff0\"\/><\/figure>\n\n\n\n<h2 class=\"wp-block-heading\">\u53bb\u9664\u7f51\u9875\u91cc\u83b7\u53d6\u65f6\u5019\u9047\u5230\u7684\u810f\u6570\u636e<\/h2>\n\n\n\n<p>\n\n\u67e5\u770b\u7f51\u9875\u7684\u65f6\u5019\u53d1\u73b0\u8fd9\u4e2a\u4e1c\u897f\uff0c\u53ef\u80fd\u662f\u4ed6\u4eec\u540e\u53f0\u6709\u5176\u4ed6\u7528\u9014\uff0c\u7531\u4e8e\u76f4\u63a5\u5339\u914d\u5b57\u6bb5\u4e0d\u65b9\u4fbf\uff0c\u5148\u628a\u6240\u6709\u7f51\u9875\u6587\u672c\u5148\u83b7\u53d6\u518d\u628aclass\u4e3athis_jammer\u7b49\u4e2d\u5185\u5bb9\u83b7\u53d6\u4e3a\u505c\u7528\u8bcd\u8868\uff0c\u722c\u53d6\u7684\u8bd5\u9898\u6587\u672c\u53bb\u6389\u8fd9\u4e9b\u810f\u6570\u636e\u5c31OK\u4e86\u3002\n\n<\/p>\n\n\n\n<figure class=\"wp-block-image\"><img decoding=\"async\" src=\"https:\/\/img-blog.csdnimg.cn\/20200330134519376.png?x-oss-process=image\/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80NDY0ODkwMA==,size_16,color_FFFFFF,t_70#pic_center\" alt=\"\u5728\u8fd9\u91cc\u63d2\u5165\u56fe\u7247\u63cf\u8ff0\"\/><\/figure>\n\n\n\n<p>\u4e0b\u9762\u4fdd\u5b58\u810f\u6570\u636e\u8868\u7684\u51fd\u6570<\/p>\n\n\n\n<figure class=\"wp-block-image\"><img decoding=\"async\" src=\"https:\/\/img-blog.csdnimg.cn\/20200330135049494.png?x-oss-process=image\/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80NDY0ODkwMA==,size_16,color_FFFFFF,t_70#pic_center\" alt=\"\u5728\u8fd9\u91cc\u63d2\u5165\u56fe\u7247\u63cf\u8ff0\"\/><\/figure>\n\n\n\n<pre class=\"wp-block-code\"><code>#-*-coding:utf-8-*-\nimport requests\nfrom bs4 import BeautifulSoup\n# import codecs\ndef get_url(target_url, server, headers):\n    req = requests.get(target_url, headers=headers)\n    bf = BeautifulSoup(req.text)\n    div = bf.find_all('div', class_='questions_col')\n    a_bf = BeautifulSoup(str(div[0]))\n    a = a_bf.find_all('a')\n    cheak_parsing_url = []\n    for each in a:\n        if each.string == \"\u67e5\u770b\u89e3\u6790\":\n            full_url = server + each.get('href')\n            cheak_parsing_url.append(full_url)\n    print(cheak_parsing_url)\n    return cheak_parsing_url\n\ndef change_page(target_url, server, headers):\n    req = requests.get(target_url, headers=headers)\n    bf = BeautifulSoup(req.text)\n    div = bf.find_all('div', class_='fenye')\n    a_bf = BeautifulSoup(str(div[0]))\n    a = a_bf.find_all('a')\n    full_url = None\n    for each in a:\n        if each.string == \"\u4e0b\u4e00\u9875\":\n            full_url = server + each.get('href')\n            print(full_url)\n        else :\n            continue\n    return full_url\n\ndef get_html(url_list, file_path, headers):\n    for url in url_list:\n        req = requests.get(url, headers=headers)\n        content = req.content.decode('utf-8','ignore')\n        bf = BeautifulSoup(content, fromEncoding=\"gb18030\")\n        del_text = bf.find_all(class_=[\"this_jammer\", \"hidejammersa\", \"jammerd42\"])\n        for i in del_text:\n            if i:\n                new_tag = \"\"\n                try:\n                    i.string.replace_with(new_tag)\n                except:\n                    pass\n        texts = bf.find_all('div', class_= 'answer_detail')\n        try:\n            texts = texts[0].text.replace('\\xa0', '')\n            texts = texts.replace(\" \", \"\")\n        except:\n            pass\n        try:\n            texts = texts.replace(\"\\n\", '')\n        except:\n            pass\n        print(texts)\n        contents_save(file_path, texts)\n\ndef contents_save(file_path, content):\n    \"\"\"\n    :param file_path: \u722c\u53d6\u6587\u4ef6\u4fdd\u5b58\u8def\u5f84\n    :param content: \u722c\u53d6\u6587\u672c\u6587\u4ef6\u5185\u5bb9\n    :return: None\n    \"\"\"\n    with open(file_path, 'a', encoding=\"utf-8\", errors='ignore') as f:\n        try:\n            f.write(content)\n        except:\n            pass\n        f.write('\\n')\n\ndef get_category(target_url, server, headers):\n    req = requests.get(target_url, headers=headers)\n    bf = BeautifulSoup(req.text)\n    div = bf.find_all('div', class_='shiti_catagory frame')\n    a_bf = BeautifulSoup(str(div[0]))\n    a = a_bf.find_all('a')\n    category = []\n    for each in a:\n        full_url = server + each.get('href')\n        category.append(full_url)\n    print(category)\n    return category\n\nif __name__ == \"__main__\":\n    main_url = \"https:\/\/tiku.21cnjy.com\/tiku.php?mod=quest&amp;channel=8&amp;xd=3\"\n    server = \"https:\/\/tiku.21cnjy.com\/\"\n    save_dir = \"\/Users\/lidongliang\/Desktop\/\u722c\u866b\/data\"\n    subject_file = \"1.txt\"\n    file_path = save_dir + '\/' + subject_file\n    headers = {\n        'User-Agent': 'Mozilla\/5.0 (Macintosh; Intel Mac OS X 10_15_1) AppleWebKit\/537.36 (KHTML, like Gecko) Chrome\/80.0.3987.132 Safari\/537.36',\n        'Accept-Encoding': 'gzip'}\n    categorys = get_category(main_url, server, headers)\n    for category_url in categorys:\n        counting = 0\n        target_url = category_url\n        while counting &lt; 100:\n            cheak_parsing_url = get_url(target_url, server, headers)\n            get_html(cheak_parsing_url, file_path, headers)\n            target_url = change_page(target_url, server, headers)\n\n            if target_url == None:\n                break\n            counting += 1\n<\/code><\/pre>\n\n\n\n<h2 class=\"wp-block-heading\">\u8fd0\u884c\u521d\u6b65\u7ed3\u679c\u5982\u4e0b\uff1a<\/h2>\n\n\n\n<figure class=\"wp-block-image\"><img decoding=\"async\" src=\"https:\/\/img-blog.csdnimg.cn\/20200330140337210.png?x-oss-process=image\/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80NDY0ODkwMA==,size_16,color_FFFFFF,t_70#pic_center\" alt=\"\u5728\u8fd9\u91cc\u63d2\u5165\u56fe\u7247\u63cf\u8ff0\"\/><\/figure>\n\n\n\n<h2 class=\"wp-block-heading\">\u5bf9\u6587\u672c\u8fdb\u884c\u6b63\u5219\u5339\u914d\u83b7\u53d6\u6587\u672c\u6307\u5b9a\u5b57\u6bb5\u5e76\u4fdd\u5b58\u5230\u6570\u636e\u5e93<\/h2>\n\n\n\n<pre class=\"wp-block-code\"><code>import re\nimport pymysql\n\nw1 = 'A\uff0e'\nw2 = 'B\uff0e'\nw3 = 'C\uff0e'\nw4 = 'D\uff0e'\nw5 = '\u7b54\u6848'\nw6 = '\u89e3\u6790\u8bd5\u9898\u5206\u6790\uff1a'\nw7 = '\u8003\u70b9'\n\n\ndef get_txt():\n    with open(\"\/Users\/lidongliang\/Desktop\/\u722c\u866b\/data\/1.txt\", \"r\") as f:\n        txt = f.readlines()\n        return txt\n\n\ndef fen(txt):\n    # buff = txt.replace('\\n','')\n    timu = re.compile('^' + '(.*?)' + w1, re.S).findall(txt)\n    A = re.compile(w1 + '(.*?)' + w2, re.S).findall(txt)\n    B = re.compile(w2 + '(.*?)' + w3, re.S).findall(txt)\n    C = re.compile(w3 + '(.*?)' + w4, re.S).findall(txt)\n    D = re.compile(w4 + '(.*?)' + w5, re.S).findall(txt)\n    daan = re.compile(w5 + '(.*?)' + w6, re.S).findall(txt)\n    jiexi = re.compile(w6 + '(.*?)' + w7, re.S).findall(txt)\n    kaodian = re.compile(w7 + '(.*?)' + '\\Z', re.S).findall(txt)\n\n    timu.extend(A)\n    timu.extend(B)\n    timu.extend(C)\n    timu.extend(D)\n    timu.extend(daan)\n    timu.extend(jiexi)\n    timu.extend(kaodian)\n\n    # print(timu)\n\n    try:\n        tg = timu[0]\n        xx = (\"A:\" + timu[1] + \"B:\" + timu[2] + \"C:\" + timu[3] + \"D:\" + timu[4])\n        da = timu[5]\n        fx = timu[6]\n        kd = timu[7]\n    except:\n        tg = '1'\n        xx = '1'\n        da = '1'\n        fx = '1'\n        kd = '1'\n    con = pymysql.connect(host='localhost', user='root', passwd='00000000', db='login_test_1', charset='utf8')\n    cursor = con.cursor()\n    sql = \"insert into question_info(tg,xx,da,fx,kd) values('\n          \n    cursor.execute(sql)\n    con.commit()\n\n\nif __name__ == \"__main__\":\n    txt = get_txt()\n    for i in txt:\n        fen(i)\n    print(\"done\")\n<\/code><\/pre>\n\n\n\n<p>\u6700\u540e\u7ed3\u679c\uff1a<\/p>\n\n\n\n<figure class=\"wp-block-image\"><img decoding=\"async\" src=\"https:\/\/img-blog.csdnimg.cn\/20200330140313615.png?x-oss-process=image\/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80NDY0ODkwMA==,size_16,color_FFFFFF,t_70#pic_center\" alt=\"\u5728\u8fd9\u91cc\u63d2\u5165\u56fe\u7247\u63cf\u8ff0\"\/><\/figure>\n","protected":false},"excerpt":{"rendered":"<p>\u539f\u6587\u94fe\u63a5\uff1ahttps:\/\/blog.csdn.net\/weixin_44648900\/article\/details\/105196981 \u75ab\u60c5\u671f\u95f4\u65e0\u804a\u7a81\u53d1\u5947\u60f3\u60f3\u8981\u505a\u4e00\u4e2a\u5728\u7ebf\u8003\u8bd5\u7cfb\u7edf\uff0c\u76ee\u524d\u5df2\u5b8c\u6210\u6570\u636e\u5e93\u8bbe\u8ba1\uff0c\u5f00\u59cb\u7f16\u5199\u722c\u866b\u722c\u53d6\u8bd5\u9898\u6570\u636e\uff0c\u76ee\u6807\u7f51\u7ad9\u5982\u4e0b\uff0c\u83b7\u53d6\u5185\u5bb9\u5305\u62ec\uff1a\u8003\u70b9\uff0c\u8bd5\u9898\uff0c\u7b54\u6848\u9009\u9879\uff0c\u7b54\u6848\uff0c\u89e3\u6790\u3002\u8003\u70b9\u5b57\u6bb5\u7684\u83b7\u53d6\u4fbf\u4e8e\u4ee5\u540e\u7cfb\u7edf\u4e2a\u6027\u5316\u63a8\u8350\u7684\u9700\u8981\u3002\ud83d\ude94 \u53bb\u9664\u7f51\u9875\u91cc\u83b7\u53d6\u65f6\u5019\u9047\u5230\u7684\u810f\u6570\u636e \u67e5\u770b\u7f51\u9875\u7684\u65f6\u5019\u53d1\u73b0\u8fd9\u4e2a\u4e1c\u897f\uff0c\u53ef\u80fd\u662f\u4ed6\u4eec\u540e\u53f0\u6709\u5176\u4ed6\u7528\u9014\uff0c\u7531\u4e8e\u76f4\u63a5\u5339\u914d\u5b57\u6bb5\u4e0d\u65b9\u4fbf\uff0c\u5148\u628a\u6240\u6709\u7f51\u9875\u6587\u672c\u5148\u83b7\u53d6\u518d\u628aclass\u4e3athis_jammer\u7b49\u4e2d\u5185\u5bb9\u83b7\u53d6\u4e3a\u505c\u7528\u8bcd\u8868\uff0c\u722c\u53d6\u7684\u8bd5\u9898\u6587\u672c\u53bb\u6389\u8fd9\u4e9b\u810f\u6570\u636e\u5c31OK\u4e86\u3002 \u4e0b\u9762\u4fdd\u5b58\u810f\u6570\u636e\u8868\u7684\u51fd\u6570 \u8fd0\u884c\u521d\u6b65\u7ed3\u679c\u5982\u4e0b\uff1a \u5bf9\u6587\u672c\u8fdb\u884c\u6b63\u5219\u5339\u914d\u83b7\u53d6\u6587\u672c\u6307\u5b9a\u5b57\u6bb5\u5e76\u4fdd\u5b58\u5230\u6570\u636e\u5e93 \u6700\u540e\u7ed3\u679c\uff1a<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[13],"tags":[],"class_list":["post-259","post","type-post","status-publish","format-standard","hentry","category-python"],"_links":{"self":[{"href":"https:\/\/book.yuekegu.com\/index.php\/wp-json\/wp\/v2\/posts\/259","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/book.yuekegu.com\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/book.yuekegu.com\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/book.yuekegu.com\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/book.yuekegu.com\/index.php\/wp-json\/wp\/v2\/comments?post=259"}],"version-history":[{"count":0,"href":"https:\/\/book.yuekegu.com\/index.php\/wp-json\/wp\/v2\/posts\/259\/revisions"}],"wp:attachment":[{"href":"https:\/\/book.yuekegu.com\/index.php\/wp-json\/wp\/v2\/media?parent=259"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/book.yuekegu.com\/index.php\/wp-json\/wp\/v2\/categories?post=259"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/book.yuekegu.com\/index.php\/wp-json\/wp\/v2\/tags?post=259"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}