news.pyで保存しコマンドラインから
news.py > news.html
オプションはhttps://python-googlesearch.readthedocs.io/en/latest/
#pip install chardet
#pip install requests
#pip install lxml
#pip install google
#SEVER INSTALL
#pip install cchardet
# -*- coding: utf-8 -*-
from googlesearch import search
from bs4 import BeautifulSoup
import cchardet
import requests
import datetime
import sys, io
import sys
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf_8')
date_time0 = datetime.date.today()
date_time1 = datetime.datetime.today()
StartTag = """<!doctype html>\n<html>\n<head>"""
code = """<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />"""
css = """<meta http-equiv="Content-style-Type" content="text/css" />"""
java = """<meta http-equiv="Content-Script-Type" content="text/javascript" />"""
title_in = """<title>ON THE KUMO NEWS FROM NARA</title>"""
author = """<meta name="author" content="橋本 健二" />"""
description = """<meta name="description" content="python pip google で 5:00と17:00にキーワード"奈良" で取得したニュースを配信します。" />"""
keywords = """<meta name="keywords" content="NARA,nara,奈良,TENRI,tenri,天理,NEWS,news,ニュース,ON THE KUMO" />"""
generator = """<meta name="generator" content="notepad.exe,Terapad" />"""
robots = """<meta name="robots" content="index" />"""
style = """<link type="text/css" media="all" rel="stylesheet" href="../../css/news.css" />"""
jquery = """<script src="http://code.jquery.com/jquery-3.2.0.min.js"></script>"""
imgG = """<script type="text/javascript" src="../../JS/imgGuard.js"></script>"""
Endhead = """</head>"""
body = """<body>"""
img = """<a href="../../index.html"><img id="topnews" class="imgGuard" title="Top of KUMO" rel="noindex" src="../../images/news.png" alt="NewsImage" /></a>"""
btomImg = """<a href="#topnews"><img id="news" class="imgGuard" rel="noindex" src="../../images/pattern20190701.png" alt="pattern" /></a>"""
Endbody = """</body>\n</html>"""
codelink = """<a id="pythoncode" class="PythonCode" href="http://kumo.site/Document/HTML/NewsCode.html"><center>-----python code-----</center></a>"""
def google_search(query):
i = 0
for url in search(query, lang="jp", tbs="qdr:d", safe='on', tpe='nws',stop=150):
i += 1
if i >= 101:
break
try:
response = requests.get(url)
response.encoding = cchardet.detect(response.content)["encoding"]
#response.apparent_encoding
soup = BeautifulSoup(response.text, 'lxml')
title = soup.title.string
test0 = soup.select('meta[name=description]')
s0 = test0[0]
print ('<h3><p id="news_kumo_',i,'" class="on_kumo_news_title"><span class="No">',i,'</span>:',title,'</p></h3><br>',sep='')
print ('<p id="news_kumo_description',i,'" class="on_kumo_news_description">',(s0.attrs['content']),'</p><br>',sep='')
print ('<a title="No',i,'" id="',i,'" class="on_kumo_news" href="',url,'">',url,'</a><br>',sep='')
except:
try:
#print (response.encoding)
if response.encoding == "UTF-8":
title.encode('utf-8')
print ('<h3><p id="news_kumo_',i,'" class="on_kumo_news_title"><span class="No">',i,'</span>:',title,'</p></h3><br>',sep='')
print ('<p id="news_kumo_description',i,'" class="on_kumo_news_description">',(s0.attrs['content']),'</p><br>',sep='')
print ('<a title="No',i,'" id="',i,'" class="on_kumo_news" href="',url,'">',url,'</a><br>',sep='')
elif response.encoding == "SHIFT_JIS":
title.encode('SHIFT_JIS')
print ('<h3><p id="news_kumo_',i,'" class="on_kumo_news_title"><span class="No">',i,'</span>:',title,'</p></h3><br>',sep='')
print ('<p id="news_kumo_description',i,'" class="on_kumo_news_description">',(s0.attrs['content']),'</p><br>',sep='')
print ('<a title="No',i,'" id="',i,'" class="on_kumo_news" href="',url,'">',url,'</a><br>',sep='')
elif response.encoding == "ASCII":
title.encode('ASCII')
print ('<h3><p id="news_kumo_',i,'" class="on_kumo_news_title"><span class="No">',i,'</span>:',title,'</p></h3><br>',sep='')
print ('<p id="news_kumo_description',i,'" class="on_kumo_news_description">',(s0.attrs['content']),'</p><br>',sep='')
print ('<a title="No',i,'" id="',i,'" class="on_kumo_news" href="',url,'">',url,'</a><br>',sep='')
elif response.encoding == "EUC-JP":
title.encode('EUC-JP')
print ('<h3><p id="news_kumo_',i,'" class="on_kumo_news_title"><span class="No">',i,'</span>:',title,'</p></h3><br>',sep='')
print ('<p id="news_kumo_description',i,'" class="on_kumo_news_description">',(s0.attrs['content']),'</p><br>',sep='')
print ('<a title="No',i,'" id="',i,'" class="on_kumo_news" href="',url,'">',url,'</a><br>',sep='')
elif response.encoding == "EUC-JP UTF-8":
title.encode('utf-8')
print ('<h3><p id="news_kumo_',i,'" class="on_kumo_news_title"><span class="No">',i,'</span>:',title,'</p></h3><br>',sep='')
print ('<p id="news_kumo_description',i,'" class="on_kumo_news_description">',(s0.attrs['content']),'</p><br>',sep='')
print ('<a title="No',i,'" id="',i,'" class="on_kumo_news" href="',url,'">',url,'</a><br>',sep='')
else:
print ('<h3><p id="news_kumo_',i,'" class="on_kumo_news_title"><span class="No">',i,'</span>:',"BRAKING CODE ON THE KUMO",'</p></h3><br>',sep='')
print ('<p id="news_kumo_description',i,'" class="on_kumo_news_description">',(s0.attrs['content']),'</p><br>',sep='')
print ('<a title="No',i,'" id="',i,'" class="on_kumo_news" href="',url,'">',url,'</a><br>',sep='')
except:
print ('<h3><p id="news_kumo_',i,'" class="on_kumo_news_title"><span class="No">',i,'</span>:',"MAY BE NO TITLE OR ERROR",'</p></h3><br>',sep='')
print ('<p id="news_kumo_description',i,'" class="on_kumo_news_description">',(s0.attrs['content']),'</p><br>',sep='')
print ('<a title="No',i,'" id="',i,'" class="on_kumo_news" href="',url,'">',url,'</a><br>',sep='')
#####イメージ検索#####
#def google_search_img(query, limit=10):
# for i, url in zip(range(10),search_images(query, lang="jp", num=limit, tbs="qdr:d", safe='off', tpe='nws', only_standard='True')):
# print ('<a id="',i,'" class="on_kumo_news" href="',url,'">',url,'</a><br>',sep='')
def main():
print (StartTag)
print (code)
print (css)
print (java)
print (title_in)
print (author)
print (description)
print (keywords)
print ('<meta name="date" content="',date_time0,'"/>',sep='')
print (generator)
print (robots)
print (style)
print (jquery)
print (imgG)
print (Endhead)
print (body)
print (img)
print ('<p id="news_date" class="on_kumo_news" title="day and time" />',date_time1,'</p><br>',sep='')
print ('<h1 class="on_kumo_news_nara">検索ワードは<span id="Nara" style="color:#0000FF;">"奈良"</span></h1><br>',sep='')
print ('<p>タイトルに<b>"BRAKING CODE ON THE KUMO"</b>と表示される場合、文字コードが特定できてないことが考えられます。</p><br>',sep='')
print ('<p>タイトルに<b>"MAY BE NO TITLE OR ERROR"</b>が表示される場合タイトルがないか、エラーです。</p><br>',sep='')
google_search("奈良")
print (btomImg)
print (codelink)
print (Endbody)
if __name__ == '__main__':
main()