意图识别

  • 意图识别DEMO

        
      import pandas as pd
      import jieba
      from sklearn.feature_extraction.text import TfidfVectorizer
      from sklearn.linear_model import LogisticRegression
      from sklearn.pipeline import Pipeline
      from sklearn.metrics import classification_report, accuracy_score
      from sklearn.model_selection import train_test_split
        
      # 示例数据(假设已经有了标注好的意图数据)
      data = {
          'text': ["订一张去北京的机票",
                   "给我看看最新的智能手机型号",
                   "明天的天气怎么样?",
                   "你觉的天气怎么样?",
                    "python怎么样?",
                   "告诉我关于Python编程的信息"],
          'intent': ["flight_booking", 
                     "product_information", 
                     "weather_information", 
                     "weather_information",
                     "software_information",
                     "software_information"]
      }
        
      df = pd.DataFrame(data)
        
      # 分词函数(使用jieba分词)
      def chinese_tokenizer(text): 
          return list(jieba.cut(text))
        
      # 划分训练集和测试集
      train_texts, test_texts, train_labels, test_labels = train_test_split(df['text'], df['intent'], test_size=0.2, random_state=42)
        
      # 定义TF-IDF向量化器和分类器(这里使用逻辑回归作为分类器),注意不指定token_pattern参数
      tfidf_vectorizer = TfidfVectorizer(tokenizer=chinese_tokenizer)
      classifier = LogisticRegression(max_iter=1000)
        
      # 构建管道
      model = Pipeline([
          ('tfidf', tfidf_vectorizer),
          ('clf', classifier),
      ])
        
      # 训练模型
      model.fit(train_texts, train_labels)
        
      # 在测试集上评估模型
      predictions = model.predict(test_texts)
        
      print("Accuracy:", accuracy_score(test_labels, predictions))
      print("\nClassification Report:")
      print(classification_report(test_labels, predictions))
        
      # 准备测试数据
      test_data = ["预定一间去上海的酒店",
                   "我如何学习机器学习?",
                   "今天北京的温度是多少?"]
        
      # 预测测试数据的意图
      predictions = model.predict(test_data)
        
      # 输出预测结果
      for text, intent in zip(test_data, predictions):
          print(f"Text: {text} | Predicted Intent: {intent}")
        
    

results matching ""

    No results matching ""