| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193 |
- import os
- import sys
- from os import path
- from urllib import request
- from qiniu import Auth, put_file, etag
- """
- # 功能说明
- 1) 使用大仙批量下载产品图片后,需要手动修改主图,详情图,主要删除不合适的图片,保留下来的作为最终要呈现的产品资料
- 而这些经过处理后的图片位于硬盘中某个目录下,这个目录就是本脚本要处理的根目录.
- 2) 本脚本主要解析处理目录中所有的产品图片,解析,上传,逻辑如下:
- a. 提取产品名称
- b. 上传主图到七牛
- c. 上传详情图到七牛
- d. 生成详情页内容html格式文本
- e. 提取产品价格
- f. 把每个产品的以上信息输出到产品对应的目录
- 3) 脚本任务就完成了. 之后需要到后台处理.添加图片必须作为远程图片添加. 需要稍微编辑产品其他信息
- # 脚本使用
- cd /path/to/pdetails.py
- python pdetails.py <root_dir>
- """
- _VERSION_ = '0.0.1'
- _DATE_ = '2021-01-04'
- # 七牛配置
- KEY_ACCESS = 'SneSBtnWLdStBhCx0O_QogNkXoRlKNOiv1--XMBB'
- KEY_SECRET = 'GXMg-ENcp2UKYQWdeaf43tk_06NnMoA4OVFxdkYw'
- # bucket 信息
- BUCKET_NAME = 'twong'
- BUCKET_DOMAIN = 'http://twongpic.shotshock.shop'
- # 产品字典, 从 csv 文件中解析, 用于查找价格
- DICT_PROD = {}
- def remote_img_exists(url):
- """
- 检查远程文件是否已存在
- """
- try:
- request.urlopen(url)
- return True
- except:
- return False
- def parse_prod_root_name(name):
- """
- ROOT_DIR 下是一个个产品的资料目录, 解析这个目录名, 得到开头的数字串,作为上传到七牛后该产品的子路径
- 返回 数字串,产品名 或 False, False
- """
- parts = name.split('_')
- if len(parts) != 3:
- return False, False
- return parts[0], parts[2]
- def parse_csv(csv):
- """
- 解析 csv 文件
- 返回名称-价格字典
- """
- d = {}
- with open(csv, 'rt', encoding='GBK') as h:
- while True:
- line = h.readline()
- if not line:
- break
- parts = line.split(',')
- if len(parts) != 59:
- continue
- key = parts[0][1:-1]
- d[key] = parts[7]
- return d
- def upload(code, img):
- """
- code: parse_prod_root_name 返回的产品代码
- 上传文件到七牛
- """
- name = path.basename(img)
- a = Auth(access_key=KEY_ACCESS, secret_key=KEY_SECRET)
- t = a.upload_token(BUCKET_NAME,)
- key = path.join(code, name)
- ret, res = put_file(t, key, img)
- if res.status_code == 200 and not res.exception:
- return path.join(BUCKET_DOMAIN, key)
- return False
- def gen_details(imgs):
- """
- 根据图片 url 生成详情页内容,返回
- """
- content = ''
- for img in imgs:
- content += '<p>\n<img src="' + img + '">\n<p>\n'
- return content
- def main(root):
- global DICT_PROD
- all = os.listdir(root)
- for each in all:
- full = path.join(root, each)
- if path.isfile(full):
- _, ext = path.splitext(each)
- if ext == '.csv':
- DICT_PROD = parse_csv(full)
- print('INFO: csv loaded.')
- for each in all:
- full = path.join(root, each)
- if path.isdir(full):
- code, name = parse_prod_root_name(each)
- if not code:
- print('WARNING: skipped ', full)
- continue
- # 处理主图
- urls = _upload_main_images(code, path.join(full, '主图'))
- # 处理详情图
- contents = _upload_details_images(code, path.join(full, '细节图'))
- # 保存文件
- _save_prod(path.join(full, code), name, urls, contents)
- print('INFO: {} processed OK.'.format(name))
- def _save_prod(local, name, urls, details):
- """
- save single product info into specified file
- """
- with open(local, 'wt', encoding='utf8') as h:
- price = DICT_PROD[name] if name in DICT_PROD else 0.0
- h.write('name:{}\n'.format(name))
- h.write('price: {}\nimages:\n'.format(price))
- for url in urls:
- h.write(url)
- h.write('\n')
- h.write('details:\n')
- h.write(details)
- def _upload_main_images(code, dir):
- """
- 上传主图
- 返回 url 列表
- """
- urls = []
- images = os.listdir(dir)
- images.sort()
- images = images[:6]
- for img in images:
- full = path.join(dir, img)
- url = upload(code, full)
- if not url:
- print('ERROR: upload failed ', full)
- continue
- urls.append(url)
- return urls
- def _upload_details_images(code, dir):
- """
- 上传详情图
- 返回 详情页 html 内容
- """
- urls = []
- images = os.listdir(dir)
- images.sort()
- for img in images:
- full = path.join(dir, img)
- url = upload(code, full)
- if not url:
- print('ERROR: upload failed ', full)
- continue
- urls.append(url)
- return gen_details(urls)
- # testing
- if __name__ == '__main__':
- if len(sys.argv) < 2:
- print('ERROR: root dir of images not given. quit')
- exit(0)
- main(sys.argv[-1])
- print('Done.')
|