pdetails.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
  1. import os
  2. import sys
  3. from os import path
  4. from urllib import request
  5. from qiniu import Auth, put_file, etag
  6. """
  7. # 功能说明
  8. 1) 使用大仙批量下载产品图片后,需要手动修改主图,详情图,主要删除不合适的图片,保留下来的作为最终要呈现的产品资料
  9. 而这些经过处理后的图片位于硬盘中某个目录下,这个目录就是本脚本要处理的根目录.
  10. 2) 本脚本主要解析处理目录中所有的产品图片,解析,上传,逻辑如下:
  11. a. 提取产品名称
  12. b. 上传主图到七牛
  13. c. 上传详情图到七牛
  14. d. 生成详情页内容html格式文本
  15. e. 提取产品价格
  16. f. 把每个产品的以上信息输出到产品对应的目录
  17. 3) 脚本任务就完成了. 之后需要到后台处理.添加图片必须作为远程图片添加. 需要稍微编辑产品其他信息
  18. # 脚本使用
  19. cd /path/to/pdetails.py
  20. python pdetails.py <root_dir>
  21. """
  22. _VERSION_ = '0.0.1'
  23. _DATE_ = '2021-01-04'
  24. # 七牛配置
  25. KEY_ACCESS = 'SneSBtnWLdStBhCx0O_QogNkXoRlKNOiv1--XMBB'
  26. KEY_SECRET = 'GXMg-ENcp2UKYQWdeaf43tk_06NnMoA4OVFxdkYw'
  27. # bucket 信息
  28. BUCKET_NAME = 'twong'
  29. BUCKET_DOMAIN = 'http://twongpic.shotshock.shop'
  30. # 产品字典, 从 csv 文件中解析, 用于查找价格
  31. DICT_PROD = {}
  32. def remote_img_exists(url):
  33. """
  34. 检查远程文件是否已存在
  35. """
  36. try:
  37. request.urlopen(url)
  38. return True
  39. except:
  40. return False
  41. def parse_prod_root_name(name):
  42. """
  43. ROOT_DIR 下是一个个产品的资料目录, 解析这个目录名, 得到开头的数字串,作为上传到七牛后该产品的子路径
  44. 返回 数字串,产品名 或 False, False
  45. """
  46. parts = name.split('_')
  47. if len(parts) != 3:
  48. return False, False
  49. return parts[0], parts[2]
  50. def parse_csv(csv):
  51. """
  52. 解析 csv 文件
  53. 返回名称-价格字典
  54. """
  55. d = {}
  56. with open(csv, 'rt', encoding='GBK') as h:
  57. while True:
  58. line = h.readline()
  59. if not line:
  60. break
  61. parts = line.split(',')
  62. if len(parts) != 59:
  63. continue
  64. key = parts[0][1:-1]
  65. d[key] = parts[7]
  66. return d
  67. def upload(code, img):
  68. """
  69. code: parse_prod_root_name 返回的产品代码
  70. 上传文件到七牛
  71. """
  72. name = path.basename(img)
  73. a = Auth(access_key=KEY_ACCESS, secret_key=KEY_SECRET)
  74. t = a.upload_token(BUCKET_NAME,)
  75. key = path.join(code, name)
  76. ret, res = put_file(t, key, img)
  77. if res.status_code == 200 and not res.exception:
  78. return path.join(BUCKET_DOMAIN, key)
  79. return False
  80. def gen_details(imgs):
  81. """
  82. 根据图片 url 生成详情页内容,返回
  83. """
  84. content = ''
  85. for img in imgs:
  86. content += '<p>\n<img src="' + img + '">\n<p>\n'
  87. return content
  88. def main(root):
  89. global DICT_PROD
  90. all = os.listdir(root)
  91. for each in all:
  92. full = path.join(root, each)
  93. if path.isfile(full):
  94. _, ext = path.splitext(each)
  95. if ext == '.csv':
  96. DICT_PROD = parse_csv(full)
  97. print('INFO: csv loaded.')
  98. for each in all:
  99. full = path.join(root, each)
  100. if path.isdir(full):
  101. code, name = parse_prod_root_name(each)
  102. if not code:
  103. print('WARNING: skipped ', full)
  104. continue
  105. # 处理主图
  106. urls = _upload_main_images(code, path.join(full, '主图'))
  107. # 处理详情图
  108. contents = _upload_details_images(code, path.join(full, '细节图'))
  109. # 保存文件
  110. _save_prod(path.join(full, code), name, urls, contents)
  111. print('INFO: {} processed OK.'.format(name))
  112. def _save_prod(local, name, urls, details):
  113. """
  114. save single product info into specified file
  115. """
  116. with open(local, 'wt', encoding='utf8') as h:
  117. price = DICT_PROD[name] if name in DICT_PROD else 0.0
  118. h.write('name:{}\n'.format(name))
  119. h.write('price: {}\nimages:\n'.format(price))
  120. for url in urls:
  121. h.write(url)
  122. h.write('\n')
  123. h.write('details:\n')
  124. h.write(details)
  125. def _upload_main_images(code, dir):
  126. """
  127. 上传主图
  128. 返回 url 列表
  129. """
  130. urls = []
  131. images = os.listdir(dir)
  132. images.sort()
  133. images = images[:6]
  134. for img in images:
  135. full = path.join(dir, img)
  136. url = upload(code, full)
  137. if not url:
  138. print('ERROR: upload failed ', full)
  139. continue
  140. urls.append(url)
  141. return urls
  142. def _upload_details_images(code, dir):
  143. """
  144. 上传详情图
  145. 返回 详情页 html 内容
  146. """
  147. urls = []
  148. images = os.listdir(dir)
  149. images.sort()
  150. for img in images:
  151. full = path.join(dir, img)
  152. url = upload(code, full)
  153. if not url:
  154. print('ERROR: upload failed ', full)
  155. continue
  156. urls.append(url)
  157. return gen_details(urls)
  158. # testing
  159. if __name__ == '__main__':
  160. if len(sys.argv) < 2:
  161. print('ERROR: root dir of images not given. quit')
  162. exit(0)
  163. main(sys.argv[-1])
  164. print('Done.')