4 years ago · b2cafa72b5
--- a/pdetails.py
+++ b/pdetails.py
@@ -1,14 +1,16 @@
 
				 
			
 
				 import os
			
 
				 import sys
			
 
				+import requests
			
 
				 from os import path
			
 
				+from posixpath import join as posixjoin
			
 
				 from urllib import request
			
 
				 from qiniu import Auth, put_file, etag
			
 
				 
			
 
				 """
			
 
				 # 功能说明
			
 
				 1) 使用大仙批量下载产品图片后,需要手动修改主图,详情图,主要删除不合适的图片,保留下来的作为最终要呈现的产品资料
			
 
				-而这些经过处理后的图片位于硬盘中某个目录下,这个目录就是本脚本要处理的根目录.
			
 
				+这些经过处理后的图片位于硬盘中某个目录下,这个目录就是本脚本要处理的根目录.
			
 
				 2) 本脚本主要解析处理目录中所有的产品图片,解析,上传,逻辑如下:
			
 
				 a. 提取产品名称
			
 
				 b. 上传主图到七牛
			
@@ -16,7 +18,8 @@ c. 上传详情图到七牛
 
				 d. 生成详情页内容html格式文本
			
 
				 e. 提取产品价格
			
 
				 f. 把每个产品的以上信息输出到产品对应的目录
			
 
				-3) 脚本任务就完成了. 之后需要到后台处理.添加图片必须作为远程图片添加. 需要稍微编辑产品其他信息
			
 
				+g. 把图片的远程地址结合产品信息上传到后台服务器进行管理
			
 
				+3) 脚本任务就完成了. 之后需要到后台处理. 需要编辑产品其他信息
			
 
				 
			
 
				 # 脚本使用
			
 
				 cd /path/to/pdetails.py
			
@@ -24,8 +27,9 @@ python pdetails.py <root_dir>
 
				 
			
 
				 """
			
 
				 
			
 
				-_VERSION_ = '0.0.1'
			
 
				+_VERSION_ = '0.1.2'
			
 
				 _DATE_ = '2021-01-04'
			
 
				+_AUTHOR_ = 'wenstudio@asia.com'
			
 
				 
			
 
				 # 七牛配置
			
 
				 KEY_ACCESS = 'SneSBtnWLdStBhCx0O_QogNkXoRlKNOiv1--XMBB'
			
@@ -35,13 +39,20 @@ KEY_SECRET = 'GXMg-ENcp2UKYQWdeaf43tk_06NnMoA4OVFxdkYw'
 
				 BUCKET_NAME = 'twong'
			
 
				 BUCKET_DOMAIN = 'http://twongpic.shotshock.shop'
			
 
				 
			
 
				+TWONG = 'http://twong.shotshock.shop'
			
 
				+
			
 
				 # 产品字典, 从 csv 文件中解析, 用于查找价格
			
 
				 DICT_PROD = {}
			
 
				 
			
 
				+print('pdetails-v{}. 美天旺项目内部工具\n'.format(_VERSION_))
			
 
				 
			
 
				 def remote_img_exists(url):
			
 
				     """
			
 
				     检查远程文件是否已存在
			
 
				+
			
 
				+    @url: remote image url
			
 
				+
			
 
				+    @return: boolean.
			
 
				     """
			
 
				     try:
			
 
				         request.urlopen(url)
			
@@ -53,7 +64,11 @@ def remote_img_exists(url):
 
				 def parse_prod_root_name(name):
			
 
				     """
			
 
				     ROOT_DIR 下是一个个产品的资料目录, 解析这个目录名, 得到开头的数字串,作为上传到七牛后该产品的子路径
			
 
				-    返回 数字串,产品名 或 False, False
			
 
				+
			
 
				+    @name: path name
			
 
				+
			
 
				+    @return str, str.
			
 
				+        数字串,产品名 或 False, False
			
 
				     """
			
 
				     parts = name.split('_')
			
 
				     if len(parts) != 3:
			
@@ -63,49 +78,71 @@ def parse_prod_root_name(name):
 
				 
			
 
				 def parse_csv(csv):
			
 
				     """
			
 
				-    解析 csv 文件
			
 
				-    返回名称-价格字典
			
 
				+    @csv: csv file
			
 
				+
			
 
				+    @return: dict
			
 
				+        the most important columns in the csv file.
			
 
				     """
			
 
				     d = {}
			
 
				     with open(csv, 'rt', encoding='GBK') as h:
			
 
				+        # version line
			
 
				+        version_line = h.readline()
			
 
				+        parts = version_line.split(' ')
			
 
				+        if len(parts) != 2 or parts[1].lower().strip() != '1.00':
			
 
				+            print('ERROR: unsupported csv file version. please update this software.')
			
 
				+            exit(0)
			
 
				+        # english headers
			
 
				+        eng_headers = h.readline().split(',')
			
 
				+        cn_headers = h.readline().split(',')
			
 
				+        # products
			
 
				         while True:
			
 
				-            line = h.readline()
			
 
				-            if not line:
			
 
				+            values = h.readline()
			
 
				+            if not values:
			
 
				                 break
			
 
				-            parts = line.split(',')
			
 
				-            if len(parts) != 59:
			
 
				-                continue
			
 
				-            key = parts[0][1:-1]
			
 
				-            d[key] = parts[7]
			
 
				+            values = values.split(',')
			
 
				+            key = values[0].strip(' "')
			
 
				+            d[key] = {}
			
 
				+            # extract some columes
			
 
				+            for x in [7,9,10,11]:
			
 
				+                d[key][cn_headers[x]] = values[x].strip(' "')
			
 
				     return d
			
 
				 
			
 
				 
			
 
				 def upload(code, img):
			
 
				     """
			
 
				-    code: parse_prod_root_name 返回的产品代码
			
 
				     上传文件到七牛
			
 
				+
			
 
				+    @code: parse_prod_root_name 返回的产品代码
			
 
				+    @img: local image file to be uploaded
			
 
				+
			
 
				+    @return str/False. url of image uploaded or False
			
 
				     """
			
 
				     name = path.basename(img)
			
 
				     a = Auth(access_key=KEY_ACCESS, secret_key=KEY_SECRET)
			
 
				     t = a.upload_token(BUCKET_NAME,)
			
 
				-    key = path.join(code, name)
			
 
				+    key = posixjoin(code, name)
			
 
				     ret, res = put_file(t, key, img)
			
 
				     if res.status_code == 200 and not res.exception:
			
 
				-        return path.join(BUCKET_DOMAIN, key)
			
 
				+        return posixjoin(BUCKET_DOMAIN, key)
			
 
				     return False
			
 
				 
			
 
				 
			
 
				 def gen_details(imgs):
			
 
				     """
			
 
				-    根据图片 url 生成详情页内容,返回
			
 
				+    根据图片 url 生成详情页内容
			
 
				+
			
 
				+    @imgs: url of images uploaded
			
 
				+    
			
 
				+    @return: str, product details.
			
 
				     """
			
 
				     content = ''
			
 
				     for img in imgs:
			
 
				-        content += '<p>\n<img src="' + img + '">\n<p>\n'
			
 
				+        content += '<p>\n<img src="' + img + '" />\n</p>\n'
			
 
				     return content
			
 
				 
			
 
				 
			
 
				 def main(root):
			
 
				+    # find and parse csv file
			
 
				     global DICT_PROD
			
 
				     all = os.listdir(root)
			
 
				     for each in all:
			
@@ -115,7 +152,8 @@ def main(root):
 
				             if ext == '.csv':
			
 
				                 DICT_PROD = parse_csv(full)
			
 
				                 print('INFO: csv loaded.')
			
 
				-
			
 
				+	
			
 
				+    products = []
			
 
				     for each in all:
			
 
				         full = path.join(root, each)
			
 
				         if path.isdir(full):
			
@@ -123,39 +161,66 @@ def main(root):
 
				             if not code:
			
 
				                 print('WARNING: skipped ', full)
			
 
				                 continue
			
 
				+            product = {}
			
 
				             # 处理主图
			
 
				-            urls = _upload_main_images(code, path.join(full, '主图'))
			
 
				+            main_imgs = _upload_images(code, path.join(full, '主图'), 6)
			
 
				+            # 处理属性图
			
 
				+            attr_imgs = _upload_images(code, path.join(full, '属性图'))
			
 
				             # 处理详情图
			
 
				-            contents = _upload_details_images(code, path.join(full, '细节图'))
			
 
				+            details_imgs = _upload_images(code, path.join(full, '细节图'))
			
 
				+            details_text = gen_details(details_imgs)
			
 
				             # 保存文件
			
 
				-            _save_prod(path.join(full, code), name, urls, contents)
			
 
				+            _save_prod(path.join(root, '{}_{}.txt'.format(code, name)), name, main_imgs, details_text)
			
 
				             print('INFO: {} processed OK.'.format(name))
			
 
				+            product['name'] = name
			
 
				+            product['images'] = [main_imgs or [], attr_imgs or [], details_imgs or []]
			
 
				+            products.append(product)
			
 
				+
			
 
				+    # upload to server
			
 
				+    if not _upload_remote_images_to_twong(products):
			
 
				+        print('ERROR: upload to manager failed.')
			
 
				 
			
 
				 
			
 
				 def _save_prod(local, name, urls, details):
			
 
				     """
			
 
				-    save single product info into specified file
			
 
				+    Save single product info into specified file
			
 
				+
			
 
				+    @local: local file to be saved
			
 
				+    @name: product name
			
 
				+    @urls: main pictures url
			
 
				+    @details: details content
			
 
				+
			
 
				+    @return: None
			
 
				     """
			
 
				     with open(local, 'wt', encoding='utf8') as h:
			
 
				         price = DICT_PROD[name] if name in DICT_PROD else 0.0
			
 
				-        h.write('name:{}\n'.format(name))
			
 
				-        h.write('price: {}\nimages:\n'.format(price))
			
 
				+        h.write('产品名:{}\n'.format(name))
			
 
				+        if DICT_PROD.get(name):
			
 
				+            for k, v in DICT_PROD.get(name).items():
			
 
				+                h.write('{}: {}\n'.format(k, v))
			
 
				+        h.write('\n主图:\n')
			
 
				         for url in urls:
			
 
				             h.write(url)
			
 
				             h.write('\n')
			
 
				-        h.write('details:\n')
			
 
				+        h.write('\n详情html:\n')
			
 
				         h.write(details)
			
 
				 
			
 
				 
			
 
				-def _upload_main_images(code, dir):
			
 
				+def _upload_images(code, dir, max=0):
			
 
				     """
			
 
				-    上传主图
			
 
				-    返回 url 列表
			
 
				+    Upload images to qiniu OSS
			
 
				+
			
 
				+    @code: code of product, this code will be upload as the parent directory of all pictures
			
 
				+    @dir: dir of pictures to be uploaded.
			
 
				+    @max: max num picutures to uploaded
			
 
				+    
			
 
				+    @return: list, urls of all pictures uploaded.
			
 
				     """
			
 
				     urls = []
			
 
				     images = os.listdir(dir)
			
 
				     images.sort()
			
 
				-    images = images[:6]
			
 
				+    if max:
			
 
				+        images = images[:max]
			
 
				     for img in images:
			
 
				         full = path.join(dir, img)
			
 
				         url = upload(code, full)
			
@@ -166,28 +231,23 @@ def _upload_main_images(code, dir):
 
				     return urls
			
 
				 
			
 
				 
			
 
				-def _upload_details_images(code, dir):
			
 
				+def _upload_remote_images_to_twong(products):
			
 
				     """
			
 
				-    上传详情图
			
 
				-    返回 详情页 html 内容
			
 
				+    上传至七牛的图片 url 再次上传到后台进行管理
			
 
				+    @products: dict, 可转化为 json 的参数
			
 
				+    
			
 
				+    @return boolean
			
 
				     """
			
 
				-    urls = []
			
 
				-    images = os.listdir(dir)
			
 
				-    images.sort()
			
 
				-    for img in images:
			
 
				-        full = path.join(dir, img)
			
 
				-        url = upload(code, full)
			
 
				-        if not url:
			
 
				-            print('ERROR: upload failed ', full)
			
 
				-            continue
			
 
				-        urls.append(url)
			
 
				-    return gen_details(urls)
			
 
				+    end_point = '{}/admin/PublicAdmin/reg_images?k={}'.format(TWONG, 'twong')
			
 
				+    r = requests.post(url=end_point, json={'products':products})
			
 
				+    print(r.text)
			
 
				+    return r.status_code == 200
			
 
				 
			
 
				 
			
 
				 # testing
			
 
				 if __name__ == '__main__':
			
 
				     if len(sys.argv) < 2:
			
 
				         print('ERROR: root dir of images not given. quit')
			
 
				-        exit(0)
			
 
				+        sys.exit(0)
			
 
				     main(sys.argv[-1])
			
 
				     print('Done.')