| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253 |
- import os
- import sys
- import requests
- from os import path
- from posixpath import join as posixjoin
- from urllib import request
- from qiniu import Auth, put_file, etag
- """
- # 功能说明
- 1) 使用大仙批量下载产品图片后,需要手动修改主图,详情图,主要删除不合适的图片,保留下来的作为最终要呈现的产品资料
- 这些经过处理后的图片位于硬盘中某个目录下,这个目录就是本脚本要处理的根目录.
- 2) 本脚本主要解析处理目录中所有的产品图片,解析,上传,逻辑如下:
- a. 提取产品名称
- b. 上传主图到七牛
- c. 上传详情图到七牛
- d. 生成详情页内容html格式文本
- e. 提取产品价格
- f. 把每个产品的以上信息输出到产品对应的目录
- g. 把图片的远程地址结合产品信息上传到后台服务器进行管理
- 3) 脚本任务就完成了. 之后需要到后台处理. 需要编辑产品其他信息
- # 脚本使用
- cd /path/to/pdetails.py
- python pdetails.py <root_dir>
- """
- _VERSION_ = '0.1.2'
- _DATE_ = '2021-01-04'
- _AUTHOR_ = 'wenstudio@asia.com'
- # 七牛配置
- KEY_ACCESS = 'SneSBtnWLdStBhCx0O_QogNkXoRlKNOiv1--XMBB'
- KEY_SECRET = 'GXMg-ENcp2UKYQWdeaf43tk_06NnMoA4OVFxdkYw'
- # bucket 信息
- BUCKET_NAME = 'twong'
- BUCKET_DOMAIN = 'http://twongpic.shotshock.shop'
- TWONG = 'http://twong.shotshock.shop'
- # 产品字典, 从 csv 文件中解析, 用于查找价格
- DICT_PROD = {}
- print('pdetails-v{}. 美天旺项目内部工具\n'.format(_VERSION_))
- def remote_img_exists(url):
- """
- 检查远程文件是否已存在
- @url: remote image url
- @return: boolean.
- """
- try:
- request.urlopen(url)
- return True
- except:
- return False
- def parse_prod_root_name(name):
- """
- ROOT_DIR 下是一个个产品的资料目录, 解析这个目录名, 得到开头的数字串,作为上传到七牛后该产品的子路径
- @name: path name
- @return str, str.
- 数字串,产品名 或 False, False
- """
- parts = name.split('_')
- if len(parts) != 3:
- return False, False
- return parts[0], parts[2]
- def parse_csv(csv):
- """
- @csv: csv file
- @return: dict
- the most important columns in the csv file.
- """
- d = {}
- with open(csv, 'rt', encoding='GBK') as h:
- # version line
- version_line = h.readline()
- parts = version_line.split(' ')
- if len(parts) != 2 or parts[1].lower().strip() != '1.00':
- print('ERROR: unsupported csv file version. please update this software.')
- exit(0)
- # english headers
- eng_headers = h.readline().split(',')
- cn_headers = h.readline().split(',')
- # products
- while True:
- values = h.readline()
- if not values:
- break
- values = values.split(',')
- key = values[0].strip(' "')
- d[key] = {}
- # extract some columes
- for x in [7,9,10,11]:
- d[key][cn_headers[x]] = values[x].strip(' "')
- return d
- def upload(code, img):
- """
- 上传文件到七牛
- @code: parse_prod_root_name 返回的产品代码
- @img: local image file to be uploaded
- @return str/False. url of image uploaded or False
- """
- name = path.basename(img)
- a = Auth(access_key=KEY_ACCESS, secret_key=KEY_SECRET)
- t = a.upload_token(BUCKET_NAME,)
- key = posixjoin(code, name)
- ret, res = put_file(t, key, img)
- if res.status_code == 200 and not res.exception:
- return posixjoin(BUCKET_DOMAIN, key)
- return False
- def gen_details(imgs):
- """
- 根据图片 url 生成详情页内容
- @imgs: url of images uploaded
-
- @return: str, product details.
- """
- content = ''
- for img in imgs:
- content += '<p>\n<img src="' + img + '" />\n</p>\n'
- return content
- def main(root):
- # find and parse csv file
- global DICT_PROD
- all = os.listdir(root)
- for each in all:
- full = path.join(root, each)
- if path.isfile(full):
- _, ext = path.splitext(each)
- if ext == '.csv':
- DICT_PROD = parse_csv(full)
- print('INFO: csv loaded.')
-
- products = []
- for each in all:
- full = path.join(root, each)
- if path.isdir(full):
- code, name = parse_prod_root_name(each)
- if not code:
- print('WARNING: skipped ', full)
- continue
- product = {}
- # 处理主图
- main_imgs = _upload_images(code, path.join(full, '主图'), 6)
- # 处理属性图
- attr_imgs = _upload_images(code, path.join(full, '属性图'))
- # 处理详情图
- details_imgs = _upload_images(code, path.join(full, '细节图'))
- details_text = gen_details(details_imgs)
- # 保存文件
- _save_prod(path.join(root, '{}_{}.txt'.format(code, name)), name, main_imgs, details_text)
- print('INFO: {} processed OK.'.format(name))
- product['name'] = name
- product['images'] = [main_imgs or [], attr_imgs or [], details_imgs or []]
- products.append(product)
- # upload to server
- if not _upload_remote_images_to_twong(products):
- print('ERROR: upload to manager failed.')
- def _save_prod(local, name, urls, details):
- """
- Save single product info into specified file
- @local: local file to be saved
- @name: product name
- @urls: main pictures url
- @details: details content
- @return: None
- """
- with open(local, 'wt', encoding='utf8') as h:
- price = DICT_PROD[name] if name in DICT_PROD else 0.0
- h.write('产品名:{}\n'.format(name))
- if DICT_PROD.get(name):
- for k, v in DICT_PROD.get(name).items():
- h.write('{}: {}\n'.format(k, v))
- h.write('\n主图:\n')
- for url in urls:
- h.write(url)
- h.write('\n')
- h.write('\n详情html:\n')
- h.write(details)
- def _upload_images(code, dir, max=0):
- """
- Upload images to qiniu OSS
- @code: code of product, this code will be upload as the parent directory of all pictures
- @dir: dir of pictures to be uploaded.
- @max: max num picutures to uploaded
-
- @return: list, urls of all pictures uploaded.
- """
- urls = []
- images = os.listdir(dir)
- images.sort()
- if max:
- images = images[:max]
- for img in images:
- full = path.join(dir, img)
- url = upload(code, full)
- if not url:
- print('ERROR: upload failed ', full)
- continue
- urls.append(url)
- return urls
- def _upload_remote_images_to_twong(products):
- """
- 上传至七牛的图片 url 再次上传到后台进行管理
- @products: dict, 可转化为 json 的参数
-
- @return boolean
- """
- end_point = '{}/admin/PublicAdmin/reg_images?k={}'.format(TWONG, 'twong')
- r = requests.post(url=end_point, json={'products':products})
- print(r.text)
- return r.status_code == 200
- # testing
- if __name__ == '__main__':
- if len(sys.argv) < 2:
- print('ERROR: root dir of images not given. quit')
- sys.exit(0)
- main(sys.argv[-1])
- print('Done.')
|