代码之家  ›  专栏  ›  技术社区  ›  Mouna Camelia Hammoudi

在SVC预测中使用概率时获取错误:AttributeError:'努比·恩达雷'object没有属性'lower'

  •  0
  • Mouna Camelia Hammoudi  · 技术社区  · 5 年前

    AttributeError: 'numpy.ndarray' object has no attribute 'lower'
    

    执行行时 text_clf.fit(X_train,y_train) 我的密码。如何修正这一点,并得到我的预测是正确的概率使用支持向量机?

    我根据其余列的值预测输入文件的第一列(gold)。我的输入文件 dataExtended.txt 在以下表格下:

       gold,T-x-T,T-x-N,T-x-U,T-x-NT,T-x-UT,T-x-UN,T-x-UNT,N-x-T,N-x-N,N-x-U,N-x-NT,N-x-UT,N-x-UN,N-x-UNT,U-x-T,U-x-N,U-x-U,U-x-NT,U-x-UT,U-x-UN,U-x-UNT,NT-x-T,NT-x-N,NT-x-U,NT-x-NT,NT-x-UT,NT-x-UN,NT-x-UNT,UT-x-T,UT-x-N,UT-x-U,UT-x-NT,UT-x-UT,UT-x-UN,UT-x-UNT,UN-x-T,UN-x-N,UN-x-U,UN-x-NT,UN-x-UT,UN-x-UN,UN-x-UNT,UNT-x-T,UNT-x-N,UNT-x-U,UNT-x-NT,UNT-x-UT,UNT-x-UN,UNT-x-UNT,T-T-x,T-N-x,T-U-x,T-NT-x,T-UT-x,T-UN-x,T-UNT-x,N-T-x,N-N-x,N-U-x,N-NT-x,N-UT-x,N-UN-x,N-UNT-x,U-T-x,U-N-x,U-U-x,U-NT-x,U-UT-x,U-UN-x,U-UNT-x,NT-T-x,NT-N-x,NT-U-x,NT-NT-x,NT-UT-x,NT-UN-x,NT-UNT-x,UT-T-x,UT-N-x,UT-U-x,UT-NT-x,UT-UT-x,UT-UN-x,UT-UNT-x,UN-T-x,UN-N-x,UN-U-x,UN-NT-x,UN-UT-x,UN-UN-x,UN-UNT-x,UNT-T-x,UNT-N-x,UNT-U-x,UNT-NT-x,UNT-UT-x,UNT-UN-x,UNT-UNT-x,x-T-T,x-T-N,x-T-U,x-T-NT,x-T-UT,x-T-UN,x-T-UNT,x-N-T,x-N-N,x-N-U,x-N-NT,x-N-UT,x-N-UN,x-N-UNT,x-U-T,x-U-N,x-U-U,x-U-NT,x-U-UT,x-U-UN,x-U-UNT,x-NT-T,x-NT-N,x-NT-U,x-NT-NT,x-NT-UT,x-NT-UN,x-NT-UNT,x-UT-T,x-UT-N,x-UT-U,x-UT-NT,x-UT-UT,x-UT-UN,x-UT-UNT,x-UN-T,x-UN-N,x-UN-U,x-UN-NT,x-UN-UT,x-UN-UN,x-UN-UNT,x-UNT-T,x-UNT-N,x-UNT-U,x-UNT-NT,x-UNT-UT,x-UNT-UN,x-UNT-UNT,callersAtLeast1T,CalleesAtLeast1T,callersAllT,calleesAllT,CallersAtLeast1N,CalleesAtLeast1N,CallersAllN,CalleesAllN,childrenAtLeast1T,parentsAtLeast1T,childrenAtLeast1N,parentsAtLeast1N,childrenAllT,parentsAllT,childrenAllN,ParentsAllN,ParametersatLeast1T,FieldMethodsAtLeast1T,ReturnTypeAtLeast1T,ParametersAtLeast1N,FieldMethodsAtLeast1N,ReturnTypeN,ParametersAllT,FieldMethodsAllT,ParametersAllN,FieldMethodsAllN,ClassGoldN,ClassGoldT,Inner,Leaf,Root,Isolated,EmptyCallers,EmptyCallees,EmptyCallersCallers,EmptyCalleesCallees,Program,Requirement,MethodID
    T,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,chess,1,1
    N,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,chess,2,1
    N,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,chess,3,1
    N,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,chess,4,1
    N,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,chess,5,1
    N,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,chess,6,1
    N,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,chess,7,1
    N,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,chess,8,1
    N,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,chess,1,3
    N,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,chess,2,3
    N,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,chess,3,3
    N,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,chess,4,3
    N,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,chess,5,3
    

    # Make Predictions with Naive Bayes On The Iris Dataset
    from sklearn.cross_validation import train_test_split 
    from sklearn import metrics
    import pandas as pd 
    import numpy as np
    import seaborn as sns; sns.set()
    from sklearn.metrics import confusion_matrix 
    from sklearn.metrics import accuracy_score 
    from sklearn.metrics import classification_report 
    import seaborn as sns
    from sklearn import svm
    from sklearn.svm import LinearSVC
    from sklearn.feature_extraction.text import TfidfVectorizer
    from sklearn.pipeline import Pipeline
    
    
    
    data = pd.read_csv( 'dataExtended.txt', sep= ',') 
    row_count, column_count = data.shape
    
        # Printing the dataswet shape 
    print ("Dataset Length: ", len(data)) 
    print ("Dataset Shape: ", data.shape) 
    print("Number of columns ", column_count)
    
        # Printing the dataset obseravtions 
    print ("Dataset: ",data.head()) 
    data['gold'] = data['gold'].astype('category').cat.codes
    data['Program'] = data['Program'].astype('category').cat.codes
        # Building Phase Separating the target variable 
    X = data.values[:, 1:column_count] 
    Y = data.values[:, 0] 
    
        # Splitting the dataset into train and test 
    X_train, X_test, y_train, y_test = train_test_split( 
    X, Y, test_size = 0.3, random_state = 100) 
        #Create a svm Classifier
    svclassifier = svm.LinearSVC()
    
    print('Before fitting')
    svclassifier.fit(X_train, y_train)
    predicted = svclassifier.predict(X_test)
    
    
    text_clf = Pipeline([('tfidf',TfidfVectorizer()),('clf',LinearSVC())])
    text_clf.fit(X_train,y_train)
    

    导致回溯的错误:

    Traceback (most recent call last):
    
      File "<ipython-input-9-8e85a0a9f81c>", line 1, in <module>
        runfile('C:/Users/mouna/ownCloud/Mouna Hammoudi/dumps/Python/Paper4SVM.py', wdir='C:/Users/mouna/ownCloud/Mouna Hammoudi/dumps/Python')
    
      File "C:\Users\mouna\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 668, in runfile
        execfile(filename, namespace)
    
      File "C:\Users\mouna\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 108, in execfile
        exec(compile(f.read(), filename, 'exec'), namespace)
    
      File "C:/Users/mouna/ownCloud/Mouna Hammoudi/dumps/Python/Paper4SVM.py", line 53, in <module>
        text_clf.fit(X_train,y_train)
    
      File "C:\Users\mouna\Anaconda3\lib\site-packages\sklearn\pipeline.py", line 248, in fit
        Xt, fit_params = self._fit(X, y, **fit_params)
    
      File "C:\Users\mouna\Anaconda3\lib\site-packages\sklearn\pipeline.py", line 213, in _fit
        **fit_params_steps[name])
    
      File "C:\Users\mouna\Anaconda3\lib\site-packages\sklearn\externals\joblib\memory.py", line 362, in __call__
        return self.func(*args, **kwargs)
    
      File "C:\Users\mouna\Anaconda3\lib\site-packages\sklearn\pipeline.py", line 581, in _fit_transform_one
        res = transformer.fit_transform(X, y, **fit_params)
    
      File "C:\Users\mouna\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py", line 1381, in fit_transform
        X = super(TfidfVectorizer, self).fit_transform(raw_documents)
    
      File "C:\Users\mouna\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py", line 869, in fit_transform
        self.fixed_vocabulary_)
    
      File "C:\Users\mouna\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py", line 792, in _count_vocab
        for feature in analyze(doc):
    
      File "C:\Users\mouna\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py", line 266, in <lambda>
        tokenize(preprocess(self.decode(doc))), stop_words)
    
      File "C:\Users\mouna\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py", line 232, in <lambda>
        return lambda x: strip_accents(x.lower())
    
    0 回复  |  直到 5 年前
        1
  •  2
  •   desertnaut SKZI    5 年前

    你呢 不能 对数值数据使用TF-IDF相关方法;该方法专门用于 文本 .tolower() ,默认情况下适用于字符串,因此出现错误。这已经从 documentation :

    fit

    从训练集中学习词汇和idf。

    原始文件:

    恐怕您的理由,正如评论中所解释的那样:

    我只是想知道每个预测都是正确的,TF-IDF似乎是使用SVM时唯一的方法

    非常虚弱。首先,没有这样的事情 每个预测是正确的概率 “-我认为你是说 概率预测 Predict classes or class probabilities? )

    为了达到你的实际需求:对比 LinearSVC SVC 确实提供了 predict_proba docs 实际上是一个支持向量机-请参阅中的答案 Under what parameters are SVC and LinearSVC in scikit-learn equivalent? 了解详情。