import os import sys import requests from os import path from posixpath import join as posixjoin from urllib import request from qiniu import Auth, put_file, etag """ # 功能说明 1) 使用大仙批量下载产品图片后,需要手动修改主图,详情图,主要删除不合适的图片,保留下来的作为最终要呈现的产品资料 这些经过处理后的图片位于硬盘中某个目录下,这个目录就是本脚本要处理的根目录. 2) 本脚本主要解析处理目录中所有的产品图片,解析,上传,逻辑如下: a. 提取产品名称 b. 上传主图到七牛 c. 上传详情图到七牛 d. 生成详情页内容html格式文本 e. 提取产品价格 f. 把每个产品的以上信息输出到产品对应的目录 g. 把图片的远程地址结合产品信息上传到后台服务器进行管理 3) 脚本任务就完成了. 之后需要到后台处理. 需要编辑产品其他信息 # 脚本使用 cd /path/to/pdetails.py python pdetails.py """ _VERSION_ = '0.1.2' _DATE_ = '2021-01-04' _AUTHOR_ = 'wenstudio@asia.com' # 七牛配置 KEY_ACCESS = 'SneSBtnWLdStBhCx0O_QogNkXoRlKNOiv1--XMBB' KEY_SECRET = 'GXMg-ENcp2UKYQWdeaf43tk_06NnMoA4OVFxdkYw' # bucket 信息 BUCKET_NAME = 'twong' BUCKET_DOMAIN = 'http://twongpic.shotshock.shop' TWONG = 'http://twong.shotshock.shop' # 产品字典, 从 csv 文件中解析, 用于查找价格 DICT_PROD = {} print('pdetails-v{}. 美天旺项目内部工具\n'.format(_VERSION_)) def remote_img_exists(url): """ 检查远程文件是否已存在 @url: remote image url @return: boolean. """ try: request.urlopen(url) return True except: return False def parse_prod_root_name(name): """ ROOT_DIR 下是一个个产品的资料目录, 解析这个目录名, 得到开头的数字串,作为上传到七牛后该产品的子路径 @name: path name @return str, str. 数字串,产品名 或 False, False """ parts = name.split('_') if len(parts) != 3: return False, False return parts[0], parts[2] def parse_csv(csv): """ @csv: csv file @return: dict the most important columns in the csv file. """ d = {} with open(csv, 'rt', encoding='GBK') as h: # version line version_line = h.readline() parts = version_line.split(' ') if len(parts) != 2 or parts[1].lower().strip() != '1.00': print('ERROR: unsupported csv file version. please update this software.') exit(0) # english headers eng_headers = h.readline().split(',') cn_headers = h.readline().split(',') # products while True: values = h.readline() if not values: break values = values.split(',') key = values[0].strip(' "') d[key] = {} # extract some columes for x in [7,9,10,11]: d[key][cn_headers[x]] = values[x].strip(' "') return d def upload(code, img): """ 上传文件到七牛 @code: parse_prod_root_name 返回的产品代码 @img: local image file to be uploaded @return str/False. url of image uploaded or False """ name = path.basename(img) a = Auth(access_key=KEY_ACCESS, secret_key=KEY_SECRET) t = a.upload_token(BUCKET_NAME,) key = posixjoin(code, name) ret, res = put_file(t, key, img) if res.status_code == 200 and not res.exception: return posixjoin(BUCKET_DOMAIN, key) return False def gen_details(imgs): """ 根据图片 url 生成详情页内容 @imgs: url of images uploaded @return: str, product details. """ content = '' for img in imgs: content += '

\n\n

\n' return content def main(root): # find and parse csv file global DICT_PROD all = os.listdir(root) for each in all: full = path.join(root, each) if path.isfile(full): _, ext = path.splitext(each) if ext == '.csv': DICT_PROD = parse_csv(full) print('INFO: csv loaded.') products = [] for each in all: full = path.join(root, each) if path.isdir(full): code, name = parse_prod_root_name(each) if not code: print('WARNING: skipped ', full) continue product = {} # 处理主图 main_imgs = _upload_images(code, path.join(full, '主图'), 6) # 处理属性图 attr_imgs = _upload_images(code, path.join(full, '属性图')) # 处理详情图 details_imgs = _upload_images(code, path.join(full, '细节图')) details_text = gen_details(details_imgs) # 保存文件 _save_prod(path.join(root, '{}_{}.txt'.format(code, name)), name, main_imgs, details_text) print('INFO: {} processed OK.'.format(name)) product['name'] = name product['images'] = [main_imgs or [], attr_imgs or [], details_imgs or []] products.append(product) # upload to server if not _upload_remote_images_to_twong(products): print('ERROR: upload to manager failed.') def _save_prod(local, name, urls, details): """ Save single product info into specified file @local: local file to be saved @name: product name @urls: main pictures url @details: details content @return: None """ with open(local, 'wt', encoding='utf8') as h: price = DICT_PROD[name] if name in DICT_PROD else 0.0 h.write('产品名:{}\n'.format(name)) if DICT_PROD.get(name): for k, v in DICT_PROD.get(name).items(): h.write('{}: {}\n'.format(k, v)) h.write('\n主图:\n') for url in urls: h.write(url) h.write('\n') h.write('\n详情html:\n') h.write(details) def _upload_images(code, dir, max=0): """ Upload images to qiniu OSS @code: code of product, this code will be upload as the parent directory of all pictures @dir: dir of pictures to be uploaded. @max: max num picutures to uploaded @return: list, urls of all pictures uploaded. """ urls = [] images = os.listdir(dir) images.sort() if max: images = images[:max] for img in images: full = path.join(dir, img) url = upload(code, full) if not url: print('ERROR: upload failed ', full) continue urls.append(url) return urls def _upload_remote_images_to_twong(products): """ 上传至七牛的图片 url 再次上传到后台进行管理 @products: dict, 可转化为 json 的参数 @return boolean """ end_point = '{}/admin/PublicAdmin/reg_images?k={}'.format(TWONG, 'twong') r = requests.post(url=end_point, json={'products':products}) print(r.text) return r.status_code == 200 # testing if __name__ == '__main__': if len(sys.argv) < 2: print('ERROR: root dir of images not given. quit') sys.exit(0) main(sys.argv[-1]) print('Done.')