joe
/
paper


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
							
import os
import sys
from os import path
from urllib import request
from qiniu import Auth, put_file, etag

"""
# 功能说明
1) 使用大仙批量下载产品图片后,需要手动修改主图,详情图,主要删除不合适的图片,保留下来的作为最终要呈现的产品资料
而这些经过处理后的图片位于硬盘中某个目录下,这个目录就是本脚本要处理的根目录.
2) 本脚本主要解析处理目录中所有的产品图片,解析,上传,逻辑如下:
a. 提取产品名称
b. 上传主图到七牛
c. 上传详情图到七牛
d. 生成详情页内容html格式文本
e. 提取产品价格
f. 把每个产品的以上信息输出到产品对应的目录
3) 脚本任务就完成了. 之后需要到后台处理.添加图片必须作为远程图片添加. 需要稍微编辑产品其他信息

# 脚本使用
cd /path/to/pdetails.py
python pdetails.py <root_dir>

"""

_VERSION_ = '0.0.1'
_DATE_ = '2021-01-04'

# 七牛配置
KEY_ACCESS = 'SneSBtnWLdStBhCx0O_QogNkXoRlKNOiv1--XMBB'
KEY_SECRET = 'GXMg-ENcp2UKYQWdeaf43tk_06NnMoA4OVFxdkYw'

# bucket 信息
BUCKET_NAME = 'twong'
BUCKET_DOMAIN = 'http://twongpic.shotshock.shop'

# 产品字典, 从 csv 文件中解析, 用于查找价格
DICT_PROD = {}


def remote_img_exists(url):
    """
    检查远程文件是否已存在
    """
    try:
        request.urlopen(url)
        return True
    except:
        return False


def parse_prod_root_name(name):
    """
    ROOT_DIR 下是一个个产品的资料目录, 解析这个目录名, 得到开头的数字串,作为上传到七牛后该产品的子路径
    返回 数字串,产品名 或 False, False
    """
    parts = name.split('_')
    if len(parts) != 3:
        return False, False
    return parts[0], parts[2]


def parse_csv(csv):
    """
    解析 csv 文件
    返回名称-价格字典
    """
    d = {}
    with open(csv, 'rt', encoding='GBK') as h:
        while True:
            line = h.readline()
            if not line:
                break
            parts = line.split(',')
            if len(parts) != 59:
                continue
            key = parts[0][1:-1]
            d[key] = parts[7]
    return d


def upload(code, img):
    """
    code: parse_prod_root_name 返回的产品代码
    上传文件到七牛
    """
    name = path.basename(img)
    a = Auth(access_key=KEY_ACCESS, secret_key=KEY_SECRET)
    t = a.upload_token(BUCKET_NAME,)
    key = path.join(code, name)
    ret, res = put_file(t, key, img)
    if res.status_code == 200 and not res.exception:
        return path.join(BUCKET_DOMAIN, key)
    return False


def gen_details(imgs):
    """
    根据图片 url 生成详情页内容,返回
    """
    content = ''
    for img in imgs:
        content += '<p>\n<img src="' + img + '">\n<p>\n'
    return content


def main(root):
    global DICT_PROD
    all = os.listdir(root)
    for each in all:
        full = path.join(root, each)
        if path.isfile(full):
            _, ext = path.splitext(each)
            if ext == '.csv':
                DICT_PROD = parse_csv(full)
                print('INFO: csv loaded.')

    for each in all:
        full = path.join(root, each)
        if path.isdir(full):
            code, name = parse_prod_root_name(each)
            if not code:
                print('WARNING: skipped ', full)
                continue
            # 处理主图
            urls = _upload_main_images(code, path.join(full, '主图'))
            # 处理详情图
            contents = _upload_details_images(code, path.join(full, '细节图'))
            # 保存文件
            _save_prod(path.join(full, code), name, urls, contents)
            print('INFO: {} processed OK.'.format(name))


def _save_prod(local, name, urls, details):
    """
    save single product info into specified file
    """
    with open(local, 'wt', encoding='utf8') as h:
        price = DICT_PROD[name] if name in DICT_PROD else 0.0
        h.write('name:{}\n'.format(name))
        h.write('price: {}\nimages:\n'.format(price))
        for url in urls:
            h.write(url)
            h.write('\n')
        h.write('details:\n')
        h.write(details)


def _upload_main_images(code, dir):
    """
    上传主图
    返回 url 列表
    """
    urls = []
    images = os.listdir(dir)
    images.sort()
    images = images[:6]
    for img in images:
        full = path.join(dir, img)
        url = upload(code, full)
        if not url:
            print('ERROR: upload failed ', full)
            continue
        urls.append(url)
    return urls


def _upload_details_images(code, dir):
    """
    上传详情图
    返回 详情页 html 内容
    """
    urls = []
    images = os.listdir(dir)
    images.sort()
    for img in images:
        full = path.join(dir, img)
        url = upload(code, full)
        if not url:
            print('ERROR: upload failed ', full)
            continue
        urls.append(url)
    return gen_details(urls)


# testing
if __name__ == '__main__':
    if len(sys.argv) < 2:
        print('ERROR: root dir of images not given. quit')
        exit(0)
    main(sys.argv[-1])
    print('Done.')