pdetails.py 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. import os
  2. import sys
  3. import requests
  4. from os import path
  5. from posixpath import join as posixjoin
  6. from urllib import request
  7. from qiniu import Auth, put_file, etag
  8. """
  9. # 功能说明
  10. 1) 使用大仙批量下载产品图片后,需要手动修改主图,详情图,主要删除不合适的图片,保留下来的作为最终要呈现的产品资料
  11. 这些经过处理后的图片位于硬盘中某个目录下,这个目录就是本脚本要处理的根目录.
  12. 2) 本脚本主要解析处理目录中所有的产品图片,解析,上传,逻辑如下:
  13. a. 提取产品名称
  14. b. 上传主图到七牛
  15. c. 上传详情图到七牛
  16. d. 生成详情页内容html格式文本
  17. e. 提取产品价格
  18. f. 把每个产品的以上信息输出到产品对应的目录
  19. g. 把图片的远程地址结合产品信息上传到后台服务器进行管理
  20. 3) 脚本任务就完成了. 之后需要到后台处理. 需要编辑产品其他信息
  21. # 脚本使用
  22. cd /path/to/pdetails.py
  23. python pdetails.py <root_dir>
  24. """
  25. _VERSION_ = '0.1.2'
  26. _DATE_ = '2021-01-04'
  27. _AUTHOR_ = 'wenstudio@asia.com'
  28. # 七牛配置
  29. KEY_ACCESS = 'SneSBtnWLdStBhCx0O_QogNkXoRlKNOiv1--XMBB'
  30. KEY_SECRET = 'GXMg-ENcp2UKYQWdeaf43tk_06NnMoA4OVFxdkYw'
  31. # bucket 信息
  32. BUCKET_NAME = 'twong'
  33. BUCKET_DOMAIN = 'http://twongpic.shotshock.shop'
  34. TWONG = 'http://twong.shotshock.shop'
  35. # 产品字典, 从 csv 文件中解析, 用于查找价格
  36. DICT_PROD = {}
  37. print('pdetails-v{}. 美天旺项目内部工具\n'.format(_VERSION_))
  38. def remote_img_exists(url):
  39. """
  40. 检查远程文件是否已存在
  41. @url: remote image url
  42. @return: boolean.
  43. """
  44. try:
  45. request.urlopen(url)
  46. return True
  47. except:
  48. return False
  49. def parse_prod_root_name(name):
  50. """
  51. ROOT_DIR 下是一个个产品的资料目录, 解析这个目录名, 得到开头的数字串,作为上传到七牛后该产品的子路径
  52. @name: path name
  53. @return str, str.
  54. 数字串,产品名 或 False, False
  55. """
  56. parts = name.split('_')
  57. if len(parts) != 3:
  58. return False, False
  59. return parts[0], parts[2]
  60. def parse_csv(csv):
  61. """
  62. @csv: csv file
  63. @return: dict
  64. the most important columns in the csv file.
  65. """
  66. d = {}
  67. with open(csv, 'rt', encoding='GBK') as h:
  68. # version line
  69. version_line = h.readline()
  70. parts = version_line.split(' ')
  71. if len(parts) != 2 or parts[1].lower().strip() != '1.00':
  72. print('ERROR: unsupported csv file version. please update this software.')
  73. exit(0)
  74. # english headers
  75. eng_headers = h.readline().split(',')
  76. cn_headers = h.readline().split(',')
  77. # products
  78. while True:
  79. values = h.readline()
  80. if not values:
  81. break
  82. values = values.split(',')
  83. key = values[0].strip(' "')
  84. d[key] = {}
  85. # extract some columes
  86. for x in [7,9,10,11]:
  87. d[key][cn_headers[x]] = values[x].strip(' "')
  88. return d
  89. def upload(code, img):
  90. """
  91. 上传文件到七牛
  92. @code: parse_prod_root_name 返回的产品代码
  93. @img: local image file to be uploaded
  94. @return str/False. url of image uploaded or False
  95. """
  96. name = path.basename(img)
  97. a = Auth(access_key=KEY_ACCESS, secret_key=KEY_SECRET)
  98. t = a.upload_token(BUCKET_NAME,)
  99. key = posixjoin(code, name)
  100. ret, res = put_file(t, key, img)
  101. if res.status_code == 200 and not res.exception:
  102. return posixjoin(BUCKET_DOMAIN, key)
  103. return False
  104. def gen_details(imgs):
  105. """
  106. 根据图片 url 生成详情页内容
  107. @imgs: url of images uploaded
  108. @return: str, product details.
  109. """
  110. content = ''
  111. for img in imgs:
  112. content += '<p>\n<img src="' + img + '" />\n</p>\n'
  113. return content
  114. def main(root):
  115. # find and parse csv file
  116. global DICT_PROD
  117. all = os.listdir(root)
  118. for each in all:
  119. full = path.join(root, each)
  120. if path.isfile(full):
  121. _, ext = path.splitext(each)
  122. if ext == '.csv':
  123. DICT_PROD = parse_csv(full)
  124. print('INFO: csv loaded.')
  125. products = []
  126. for each in all:
  127. full = path.join(root, each)
  128. if path.isdir(full):
  129. code, name = parse_prod_root_name(each)
  130. if not code:
  131. print('WARNING: skipped ', full)
  132. continue
  133. product = {}
  134. # 处理主图
  135. main_imgs = _upload_images(code, path.join(full, '主图'), 6)
  136. # 处理属性图
  137. attr_imgs = _upload_images(code, path.join(full, '属性图'))
  138. # 处理详情图
  139. details_imgs = _upload_images(code, path.join(full, '细节图'))
  140. details_text = gen_details(details_imgs)
  141. # 保存文件
  142. _save_prod(path.join(root, '{}_{}.txt'.format(code, name)), name, main_imgs, details_text)
  143. print('INFO: {} processed OK.'.format(name))
  144. product['name'] = name
  145. product['images'] = [main_imgs or [], attr_imgs or [], details_imgs or []]
  146. products.append(product)
  147. # upload to server
  148. if not _upload_remote_images_to_twong(products):
  149. print('ERROR: upload to manager failed.')
  150. def _save_prod(local, name, urls, details):
  151. """
  152. Save single product info into specified file
  153. @local: local file to be saved
  154. @name: product name
  155. @urls: main pictures url
  156. @details: details content
  157. @return: None
  158. """
  159. with open(local, 'wt', encoding='utf8') as h:
  160. price = DICT_PROD[name] if name in DICT_PROD else 0.0
  161. h.write('产品名:{}\n'.format(name))
  162. if DICT_PROD.get(name):
  163. for k, v in DICT_PROD.get(name).items():
  164. h.write('{}: {}\n'.format(k, v))
  165. h.write('\n主图:\n')
  166. for url in urls:
  167. h.write(url)
  168. h.write('\n')
  169. h.write('\n详情html:\n')
  170. h.write(details)
  171. def _upload_images(code, dir, max=0):
  172. """
  173. Upload images to qiniu OSS
  174. @code: code of product, this code will be upload as the parent directory of all pictures
  175. @dir: dir of pictures to be uploaded.
  176. @max: max num picutures to uploaded
  177. @return: list, urls of all pictures uploaded.
  178. """
  179. urls = []
  180. images = os.listdir(dir)
  181. images.sort()
  182. if max:
  183. images = images[:max]
  184. for img in images:
  185. full = path.join(dir, img)
  186. url = upload(code, full)
  187. if not url:
  188. print('ERROR: upload failed ', full)
  189. continue
  190. urls.append(url)
  191. return urls
  192. def _upload_remote_images_to_twong(products):
  193. """
  194. 上传至七牛的图片 url 再次上传到后台进行管理
  195. @products: dict, 可转化为 json 的参数
  196. @return boolean
  197. """
  198. end_point = '{}/admin/PublicAdmin/reg_images?k={}'.format(TWONG, 'twong')
  199. r = requests.post(url=end_point, json={'products':products})
  200. print(r.text)
  201. return r.status_code == 200
  202. # testing
  203. if __name__ == '__main__':
  204. if len(sys.argv) < 2:
  205. print('ERROR: root dir of images not given. quit')
  206. sys.exit(0)
  207. main(sys.argv[-1])
  208. print('Done.')