ON THE KUMO PYTHON IN GOOGLE NEWS SEARCH

news.pyで保存しコマンドラインから
news.py > news.html
オプションはhttps://python-googlesearch.readthedocs.io/en/latest/
#pip install chardet
#pip install requests
#pip install lxml
#pip install google
#SEVER INSTALL
#pip install cchardet


# -*- coding: utf-8 -*-

from googlesearch import search
from bs4 import BeautifulSoup
import cchardet
import requests
import datetime
import sys, io
import sys

sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf_8')

date_time0 = datetime.date.today()
date_time1 = datetime.datetime.today()

StartTag =    """<!doctype html>\n<html>\n<head>"""
code =        """<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />"""
css =         """<meta http-equiv="Content-style-Type" content="text/css" />"""
java =        """<meta http-equiv="Content-Script-Type" content="text/javascript" />"""
title_in =    """<title>ON THE KUMO NEWS FROM NARA</title>"""
author =      """<meta name="author" content="橋本 健二" />"""
description = """<meta name="description" content="python pip google で 5:00と17:00にキーワード"奈良" で取得したニュースを配信します。" />"""
keywords =    """<meta name="keywords" content="NARA,nara,奈良,TENRI,tenri,天理,NEWS,news,ニュース,ON THE KUMO" />"""
generator =   """<meta name="generator" content="notepad.exe,Terapad" />"""
robots =      """<meta name="robots" content="index" />"""
style =       """<link type="text/css" media="all" rel="stylesheet" href="../../css/news.css" />"""
jquery =      """<script src="http://code.jquery.com/jquery-3.2.0.min.js"></script>"""
imgG =        """<script type="text/javascript" src="../../JS/imgGuard.js"></script>"""
Endhead =     """</head>"""
body =        """<body>"""
img =         """<a href="../../index.html"><img id="topnews" class="imgGuard" title="Top of KUMO" rel="noindex" src="../../images/news.png" alt="NewsImage" /></a>"""
btomImg =     """<a href="#topnews"><img id="news" class="imgGuard" rel="noindex" src="../../images/pattern20190701.png" alt="pattern" /></a>"""
Endbody =     """</body>\n</html>"""
codelink =    """<a id="pythoncode" class="PythonCode" href="http://kumo.site/Document/HTML/NewsCode.html"><center>-----python code-----</center></a>"""

def google_search(query):
 i = 0
 for url in search(query, lang="jp", tbs="qdr:d", safe='on', tpe='nws',stop=150): 
      i += 1
      if i >= 101:
           break
      try:  
        response = requests.get(url)
        response.encoding = cchardet.detect(response.content)["encoding"]
                           #response.apparent_encoding
        soup = BeautifulSoup(response.text, 'lxml')
        title = soup.title.string
        
        test0 = soup.select('meta[name=description]')
        s0 = test0[0]

        print ('<h3><p id="news_kumo_',i,'" class="on_kumo_news_title"><span class="No">',i,'</span>:',title,'</p></h3><br>',sep='')
        print ('<p id="news_kumo_description',i,'" class="on_kumo_news_description">',(s0.attrs['content']),'</p><br>',sep='')
        print ('<a title="No',i,'" id="',i,'" class="on_kumo_news" href="',url,'">',url,'</a><br>',sep='')


      except:
         try:
          #print (response.encoding)
          if response.encoding == "UTF-8":
           title.encode('utf-8')

           print ('<h3><p id="news_kumo_',i,'" class="on_kumo_news_title"><span class="No">',i,'</span>:',title,'</p></h3><br>',sep='')
           print ('<p id="news_kumo_description',i,'" class="on_kumo_news_description">',(s0.attrs['content']),'</p><br>',sep='')
           print ('<a title="No',i,'" id="',i,'" class="on_kumo_news" href="',url,'">',url,'</a><br>',sep='')
           
          elif response.encoding == "SHIFT_JIS":
           title.encode('SHIFT_JIS')

           print ('<h3><p id="news_kumo_',i,'" class="on_kumo_news_title"><span class="No">',i,'</span>:',title,'</p></h3><br>',sep='')
           print ('<p id="news_kumo_description',i,'" class="on_kumo_news_description">',(s0.attrs['content']),'</p><br>',sep='')
           print ('<a title="No',i,'" id="',i,'" class="on_kumo_news" href="',url,'">',url,'</a><br>',sep='')

          elif response.encoding == "ASCII":
           title.encode('ASCII')

           print ('<h3><p id="news_kumo_',i,'" class="on_kumo_news_title"><span class="No">',i,'</span>:',title,'</p></h3><br>',sep='')
           print ('<p id="news_kumo_description',i,'" class="on_kumo_news_description">',(s0.attrs['content']),'</p><br>',sep='')
           print ('<a title="No',i,'" id="',i,'" class="on_kumo_news" href="',url,'">',url,'</a><br>',sep='')

          elif response.encoding == "EUC-JP":
           title.encode('EUC-JP')

           print ('<h3><p id="news_kumo_',i,'" class="on_kumo_news_title"><span class="No">',i,'</span>:',title,'</p></h3><br>',sep='')
           print ('<p id="news_kumo_description',i,'" class="on_kumo_news_description">',(s0.attrs['content']),'</p><br>',sep='')
           print ('<a title="No',i,'" id="',i,'" class="on_kumo_news" href="',url,'">',url,'</a><br>',sep='')

          elif response.encoding == "EUC-JP UTF-8":
           title.encode('utf-8')

           print ('<h3><p id="news_kumo_',i,'" class="on_kumo_news_title"><span class="No">',i,'</span>:',title,'</p></h3><br>',sep='')
           print ('<p id="news_kumo_description',i,'" class="on_kumo_news_description">',(s0.attrs['content']),'</p><br>',sep='')
           print ('<a title="No',i,'" id="',i,'" class="on_kumo_news" href="',url,'">',url,'</a><br>',sep='')

          else:

           print ('<h3><p id="news_kumo_',i,'" class="on_kumo_news_title"><span class="No">',i,'</span>:',"BRAKING CODE ON THE KUMO",'</p></h3><br>',sep='')
           print ('<p id="news_kumo_description',i,'" class="on_kumo_news_description">',(s0.attrs['content']),'</p><br>',sep='')
           print ('<a title="No',i,'" id="',i,'" class="on_kumo_news" href="',url,'">',url,'</a><br>',sep='')

         except:

           print ('<h3><p id="news_kumo_',i,'" class="on_kumo_news_title"><span class="No">',i,'</span>:',"MAY BE NO TITLE OR ERROR",'</p></h3><br>',sep='')
           print ('<p id="news_kumo_description',i,'" class="on_kumo_news_description">',(s0.attrs['content']),'</p><br>',sep='')
           print ('<a title="No',i,'" id="',i,'" class="on_kumo_news" href="',url,'">',url,'</a><br>',sep='')

#####イメージ検索#####
#def google_search_img(query, limit=10):
#    for i, url in zip(range(10),search_images(query, lang="jp", num=limit, tbs="qdr:d", safe='off', tpe='nws', only_standard='True')): 
#        print ('<a id="',i,'" class="on_kumo_news" href="',url,'">',url,'</a><br>',sep='')

def main():
    print (StartTag)
    print (code)
    print (css)
    print (java)
    print (title_in)
    print (author)
    print (description)
    print (keywords)
    print ('<meta name="date" content="',date_time0,'"/>',sep='')
    print (generator)
    print (robots)
    print (style)
    print (jquery)
    print (imgG)
    print (Endhead)
    print (body)
    print (img)
    print ('<p id="news_date" class="on_kumo_news" title="day and time" />',date_time1,'</p><br>',sep='')
    print ('<h1 class="on_kumo_news_nara">検索ワードは<span id="Nara" style="color:#0000FF;">"奈良"</span></h1><br>',sep='')
    print ('<p>タイトルに<b>"BRAKING CODE ON THE KUMO"</b>と表示される場合、文字コードが特定できてないことが考えられます。</p><br>',sep='')
    print ('<p>タイトルに<b>"MAY BE NO TITLE OR ERROR"</b>が表示される場合タイトルがないか、エラーです。</p><br>',sep='')

    google_search("奈良")

    print (btomImg)
    print (codelink)
    print (Endbody)

if __name__ == '__main__':
    main()