config.yml

参考 https://ptorch.com/news/243.html

# https://rasa.com/docs/rasa/model-configuration/
recipe: default.v1

# The assistant project unique identifier 助理项目唯一标识符
# This default value must be replaced with a unique assistant name within your deployment  此默认值必须在部署中替换为唯一的助理名称
assistant_id: placeholder_default

language: en   #语言 en,zh

pipeline:  # 管道
# # No configuration for the NLU pipeline was provided. The following default pipeline was used to train your model.
# 未提供NLU管道的配置。以下默认管道用于训练模型。
# # If you'd like to customize it, uncomment and adjust the pipeline.
# 如果您想自定义它,请取消注释并调整管道。
# # See https://rasa.com/docs/rasa/tuning-your-model for more information.
  - name: JiebaTokenizer
  - name: LanguageModelFeaturizer
    model_name: "bert"
    model_weight: "bert-base-chinese"
  - name: RegexFeaturizer
  - name: DIETClassifier
    epochs: 100
    # 训练几轮 50,100, 1000 训练越多精确度越高
    learning_rate: 0.001
    constrain_similarities: True
    #tensorboard_log_directory: ./log
  - name: ResponseSelector    # 响应选择器
    epochs: 100
    learning_rate: 0.001
    constrain_similarities: true
  - name: FallbackClassifier            # 后备分类器
    # 意图兜底回复
    threshold: 0.3 
    # 置信度                       默认: 0.3  0.6
    ambiguity_threshold: 0.1
    # 最高的前2个意图得分之差不超过0.1分,动作相似无法处理,意图替换成nlu_fallback
  - name: EntitySynonymMapper  # 实体映射器
#   - name: WhitespaceTokenizer
#   - name: RegexFeaturizer
#   - name: LexicalSyntacticFeaturizer
#   - name: CountVectorsFeaturizer
#   - name: CountVectorsFeaturizer
#     analyzer: char_wb   # 分析器
#     min_ngram: 1          # 最小语法
#     max_ngram: 4          # 最大
#   - name: DIETClassifier  
#     epochs: 100
#     constrain_similarities: true

# Configuration for Rasa Core.      #Rasa Core的配置。
# https://rasa.com/docs/rasa/core/policies/
policies:   # 政策
# # No configuration for policies was provided. The following default policies were used to train your model.
# # If you'd like to customize them, uncomment and adjust the policies.
##未提供策略的配置。以下默认策略用于训练您的模型。
##如果要自定义它们,请取消注释并调整策略。
# # See https://rasa.com/docs/rasa/policies for more information.
  - name: MemoizationPolicy        # 备忘录政策
  - name: RulePolicy
    # 表单支持    
    core_fallback_threshold: 0.3
    # 动作预测兜底回复 预测下一个动作的时,如果置信度不高0.3,就要调用fallback, 或者2个最高动作之间的得分差相近0.001,也调用fallback
    core_fallback_action_name: "action_default_fallback"
    enable_fallback_prediction: True

#   - name: UnexpecTEDIntentPolicy
#     max_history: 5  # 最大历史记录
#     epochs: 100      # 时代
  - name: TEDPolicy
    max_history: 5
    epochs: 100
    constrain_similarities: true

中文对话的config.yml配置

recipe: default.v1
language: zh
pipeline:
  - name: JiebaTokenizer   # 选择分词器名称
  - name: LanguageModelFeaturizer
    model_name: "bert"
    model_weight: "bert-base-chinese"
  - name: DIETClassifier
    epochs: 100
    learning_rate: 0.001
    constrain_similarities: True
  # 可添加的
  - name: RegexFeaturizer   # 用于实体提取和意图分类的特征
    "use_word_boundaries": True    
  - name: LexicalSyntacticFeaturizer # 特征提取层
  - name: CountVectorsFeaturizer
    analyzer: char_wb
    min_ngram: 1
    max_ngram: 4
  - name: EntitySynonymMapper
  - name: ResponseSelector  # 检索意图 跟普通的意图 格式不一样
    epochs: 100
    constrain_similarities: true
  - name: FallbackClassifier
    threshold: 0.3
    ambiguity_threshold: 0.1
policies:
  - name: MemoizationPolicy
  - name: TEDPolicy
  - name: RulePolicy