代码之家  ›  专栏  ›  技术社区  ›  polonius11

用熊猫数据阅读器进行python回归建模

  •  1
  • polonius11  · 技术社区  · 6 年前

    我正在尝试构建一个函数,它将为任何库存提取数据,然后绘制回归图。但是,我遇到了与源数据有关的问题。我的问题是-如何在熊猫数据框架中绘制时间序列,并绘制随时间变化的线性趋势?我的代码如下:

    此代码将生成回归:

    import matplotlib.pyplot as plt
    import seaborn as sns; sns.set()
    import numpy as np
    rng = np.random.RandomState(1)
    x = 10 * rng.rand(50)
    y = 2 * x - 5 + rng.randn(50)
    plt.scatter(x, y);
    plt.show()
    from sklearn.linear_model import LinearRegression
    model = LinearRegression(fit_intercept=True)
    
    model.fit(x[:, np.newaxis], y)
    
    xfit = np.linspace(0, 10, 1000)
    yfit = model.predict(xfit[:, np.newaxis])
    
    plt.scatter(x, y)
    plt.plot(xfit, yfit);
    plt.show()
    

    这是我通过数据帧传递数据的尝试

    from datetime import datetime
    import pandas_datareader.data as web
    
    start = datetime(2017, 8, 1)
    end = datetime(2018, 7, 30)
    data_SP = web.DataReader('JPM', 'iex', start, end)
    
    y = dates # not sure how to get here?
    plt.scatter(data['close'], y);
    plt.show()
    
    from sklearn.linear_model import LinearRegression
    model = LinearRegression(fit_intercept=True)
    
    model.fit(data['close'][:, np.newaxis], y)
    
    xfit = np.linspace(0, 10, 1000)
    yfit = model.predict(xfit[:, np.newaxis])
    
    plt.scatter(data['close'], y)
    plt.plot(xfit, yfit);
    plt.show()
    
    3 回复  |  直到 6 年前
        1
  •  1
  •   user115215    6 年前

    import matplotlib.pyplot as plt
    import seaborn as sns; sns.set()
    import numpy as np
    from datetime import datetime
    import pandas as pd
    pd.core.common.is_list_like = pd.api.types.is_list_like
    from sklearn.linear_model import LinearRegression
    import pandas_datareader.data as web
    
    start = datetime(2017, 8, 1)
    end = datetime(2018, 7, 30)
    data_SP = web.DataReader('JPM', 'iex', start, end)
    
    dates = list(map(lambda x: datetime.strptime(x,"%Y-%m-%d"),list(data_SP.index)))
    days_since = list(map(lambda x: (x-start).days,dates))
    
    model = LinearRegression(fit_intercept=True)
    model.fit(np.array(days_since)[1:][:, np.newaxis],data_SP['close'].pct_change(1)[1:]) # <------------
    
    yfit = model.predict(np.array(days_since)[:, np.newaxis])
    
    plt.figure()
    plt.scatter(dates, yfit)
    plt.scatter(dates, data_SP['close'].pct_change(1))
    plt.xlabel('date')
    plt.ylabel('close')
    plt.show()
    

    percent change

        2
  •  1
  •   SamrajM    6 年前

    plt.scatter(data_SP.index,data_SP['close'])
    

    import matplotlib.pyplot as plt
    import numpy as np
    from datetime import datetime
    pd.core.common.is_list_like = pd.api.types.is_list_like
    from pandas_datareader import data, wb
    
    start = datetime(2017, 8, 1)
    end = datetime(2018, 7, 30)
    data_SP = data.DataReader('JPM', 'iex', start, end)
    
    plt.scatter(data_SP.index,data_SP['close'])
    

    dates changes link

    import datetime as dt
    data_df['Date'] = pd.to_datetime(data_df['Date'])
    data_df['Date']=data_df['Date'].map(dt.datetime.toordinal)
    

        3
  •  0
  •   polonius11    6 年前

    import matplotlib.pyplot as plt
    import numpy as np
    from datetime import datetime
    import pandas
    from sklearn.linear_model import LinearRegression
    import pandas_datareader.data as pdr
    
    def close_price_trending(analysis):
        model = LinearRegression(fit_intercept=True)
        model.fit(np.array(days_since)[:, np.newaxis],data_sample_processed[analysis])
        yfit = model.predict(np.array(days_since)[:, np.newaxis])
        plt.scatter(dates, data_sample_processed[analysis])
        plt.scatter(dates, yfit)
        plt.xlabel('date')
        plt.ylabel('close')
        plt.show()
    
    def return_excess_benchmark1(analysis, benchmark):
        fig = plt.figure()
        ax = fig.add_subplot(111)
        fig.subplots_adjust(top=0.85)
        ax.set_title(str(analysis) + ' O/U ' + str(benchmark))
        plt.scatter(dates, (1 + data_sample_processed[analysis]).cumprod() - (1 + data_sample_processed[benchmark]).cumprod())
        model = LinearRegression(fit_intercept=True)
        model.fit(np.array(days_since)[:, np.newaxis],(1 + data_sample_processed[analysis]).cumprod() - (1 + data_sample_processed[benchmark]).cumprod())
        yfit = model.predict(np.array(days_since)[:, np.newaxis])
        plt.scatter(dates, yfit)
        plt.xlabel('date')
        plt.ylabel('close')
        fig.show()
    
    
    # get and process data
    start = datetime(2015, 8, 1)
    end = datetime(2018, 7, 30)
    
    Symbol_List = ['GSLC', 'AGG', 'JPM','CAR', 'IVV', 'DSI', 'VTI']
    
    data = pandas.concat([pdr.DataReader(s, 'iex', start, end).rename(columns={'close': s})
                          for s in Symbol_List], axis=1)
    data_sample = data[Symbol_List]
    data_sample_processed = data_sample.pct_change()
    data_sample_processed = data_sample_processed.fillna(0)
    dates = list(map(lambda x: datetime.strptime(x,"%Y-%m-%d"),list(data_sample_processed.index)))
    days_since = list(map(lambda x: (x-start).days,dates))
    
    # start analysis
    analysis_symbol_1 = 'DSI'
    analysis_symbol_2 = 'GSLC'
    benchmark_1 = 'VTI'
    
    return_excess_benchmark1(analysis_symbol_1, benchmark_1)
    return_excess_benchmark1(analysis_symbol_2, benchmark_1)