数据共享

目的

如果您需要更多支持和帮助,欢迎随时联系我们: [email protected]

介绍

数据共享可以有很多种形式:

  1. 原始文件:txt,json,csv,等等
  2. 固定报表:list, crosstab, 等等
  3. 即席查询:api,adhoc query, 等等
API
Amazon的Product Advertising API:

实现代码
import csv
import time
import json

from collections import OrderedDict
from datetime import datetime
from amazon.api import AmazonAPI

AMAZON_ACCESS_KEY = ''
AMAZON_SECRET_KEY = ''
AMAZON_ASSOC_TAG = ''

PRODUCT_ATTRIBUTES = [
    'asin', 'author', 'binding', 'brand', 'browse_nodes', 'ean', 'edition',
    'editorial_review', 'eisbn', 'features', 'get_parent', 'isbn', 'label',
    'large_image_url', 'list_price', 'manufacturer', 'medium_image_url',
    'model', 'mpn', 'offer_url', 'parent_asin', 'part_number',
    'price_and_currency', 'publication_date', 'publisher', 'region',
    'release_date', 'reviews', 'sku', 'small_image_url', 'tiny_image_url',
    'title', 'upc'
]

amazon = AmazonAPI(AMAZON_ACCESS_KEY, AMAZON_SECRET_KEY, AMAZON_ASSOC_TAG)


global_node_options = ['link', 'image', 'text', 'script', 'link_text', 'style']
global_node_keys = ['asin', 'title', 'group', 'price', 'currency', 'offer_url', 'image_url']

def json_list(list_items, list_keys, list_name):
    result = []

    for item in list_items:
        d = OrderedDict()
        for i, list_key in enumerate(list_keys):
            d[list_key] = item[i]

        result.append(d)
    # json_result = json.dumps(result, separators=(',', ':'))
    d_result = OrderedDict()

    d_result[list_name[0]]=list_name[1]
    d_result['data'] = result
    json_result = json.dumps(d_result, sort_keys=False)
    return json_result

def get_result(search_keywords):

    print(search_keywords)
    # json_result: {api_provider: amazon.com, asin:, title:, group:, price:, currency:, offer_url:, image_url:}
    results = list()

    try:
        products = amazon.search(Keywords=search_keywords, SearchIndex='All')
        for product in products:

                results.append([product.asin, product.title, product.get_attribute('ProductGroup'),
                                 product.price_and_currency[0], product.price_and_currency[1],
                                 product.offer_url, product.large_image_url])
    except Exception as e:
        print("error: %s".format(e))


    json_result = json_list(results, global_node_keys, ['api_provider','amazon.com'])
    return json_result

def save_result():
    timestr = time.strftime("%Y%m%d-%H%M%S")

    delimiter = ','

    keywordFile = 'conf/search_keywords.csv'

    outputFile = 'data/amazonItems' + timestr + '.csv'
    logFile = 'logs/log' + timestr + '.csv'
    failedKeywordsFile = 'logs/failedkeywords' + timestr + '.csv'

    logFD = open(logFile, 'w+')
    outputFD = open(outputFile, 'w+')
    failedKeywordsFD = open(failedKeywordsFile, 'w+')

    with open(keywordFile) as f:
        reader = csv.reader(f)
        f.readline()
        count = 0
        with open(outputFile, 'w+') as csvFile:
            writer = csv.writer(csvFile, delimiter=';', quotechar='"', quoting=csv.QUOTE_ALL)
            firstRow = ["product.asin", "product.title", "product.group", "product.price", "currency", "product.offer_url",
                        "product.large_image_url"]
            writer.writerow(firstRow)
            startTime = datetime.now().strftime("%Y%m%d-%H%M%S")
            for row in reader:
                keyword = row[0]
                print(keyword)
                count += 1

                keywordStartTime = datetime.now().strftime("%Y%m%d-%H:%M:%S")
                print("processing keyword {}: {} at {}".format(count, keyword, keywordStartTime))
                logFD.write("processing keyword {}: {}  at {} \n".format(count, keyword, keywordStartTime))
                try:

                    products = amazon.search(Keywords=keyword, SearchIndex='All')
                    productCount = 0
                    for product in products:
                        try:
                            writer.writerow([product.asin, product.title, product.get_attribute('ProductGroup'),
                                             product.price_and_currency[0], product.price_and_currency[1],
                                             product.offer_url, product.large_image_url])

                            productCount += 1
                        except Exception as e:
                            print("error: %s".format(e))
                            logFD.write("error: %s  \n".format(e))
                            continue
                    keywordEndTime = datetime.now().strftime("%Y%m%d-%H:%M:%S")
                    print("processed {} products of keyword: {}  at {} ".format(productCount, keyword, keywordEndTime))
                    logFD.write(
                            "processed {} products of keyword: {}   at {}  \n".format(productCount, keyword,
                                                                                      keywordEndTime))
                except Exception as e:
                    print("error: {}".format(e))
                    logFD.write("error: {}  \n".format(e))
                    failedKeywordsFD.write("{}\n".format(keyword))
                    continue

                time.sleep(2)

    endTime = datetime.now().strftime("%Y%m%d-%H:%M:%S")
    print("all complete at {}".format(endTime))
    logFD.write("all complete at {}".format(endTime))

    csvFile.close()
    logFD.close()
    outputFD.close()
    failedKeywordsFD.close()
realtor.ca的API:
realtor.ca的API
Figure: realtor.ca的API
实现代码
# these two paras control listing result and amount for each post request
        start_page = 1
        records_paper_page = 200
        number_of_days = 1
        api_url = 'https://api2.realtor.ca/Listing.svc/PropertySearch_Post'
        params = {
            "CultureId": "1",
            "ApplicationId": "1",
            "RecordsPerPage": records_paper_page,
            "MaximumResults": "9",
            "PropertySearchTypeId": "1",
            "TransactionTypeId": "2",
            "StoreyRange": "0-0",
            "BedRange": "0-0",
            "BathRange": "0-0",
            "NumberofDays": number_of_days,
            "LongitudeMin": "-126.59401684296874",
            "LongitudeMax": "-116.39870434296874",
            "LatitudeMin": "48.1672551456224",
            "LatitudeMax": "51.755325727529346",
            "SortOrder": "A",
            "SortBy": "1",
            "viewState": "m",
            "Longitude": "-122.64174005377839",
            "Latitude": "49.14514549678261",
            "ZoomLevel": "11",
            "CurrentPage": start_page,
            "PropertyTypeGroupID": "1"
        }
        # Adding header
        headers = {
            'Host': 'api2.realtor.ca',
            'Accept': '*/*',
            'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'}

        response = requests.post(url=api_url, data=params, headers=headers)
        print response.content


        json_response = json.loads(response.content)

        print type(response.content)

        houses = json_response['Results']

        # print type(result)
        print len(houses)

学习

实践

小结


如果您有数据分析咨询实施的需求,欢迎随时和我们联系!

email:[email protected]


微信扫码支持
友情支持 自由赞助

results matching ""

    No results matching ""