此页面已过时,在 DSPy 2.5 和 2.6 中可能不完全准确
DSPy 速查表
此页面将包含常见使用模式的代码片段。
DSPy 数据加载器
导入并初始化 DataLoader 对象
从 HuggingFace 数据集加载
你可以通过调用对应 split 的 key 来访问该 split 的数据集
从 HuggingFace 加载特定数据集划分
你也可以手动指定想要包含的 split 作为参数,这将返回一个字典,其中 key 就是你指定的 split
code_alpaca = dl.from_huggingface(
"HuggingFaceH4/CodeAlpaca_20K",
split = ["train", "test"],
)
print(f"Splits in dataset: {code_alpaca.keys()}")
如果你只指定一个 split,则 dataloader 将返回 dspy.Example
列表而不是字典
code_alpaca = dl.from_huggingface(
"HuggingFaceH4/CodeAlpaca_20K",
split = "train",
)
print(f"Number of examples in split: {len(code_alpaca)}")
你也可以像处理 HuggingFace 数据集一样对 split 进行切片
code_alpaca_80 = dl.from_huggingface(
"HuggingFaceH4/CodeAlpaca_20K",
split = "train[:80%]",
)
print(f"Number of examples in split: {len(code_alpaca_80)}")
code_alpaca_20_80 = dl.from_huggingface(
"HuggingFaceH4/CodeAlpaca_20K",
split = "train[20%:80%]",
)
print(f"Number of examples in split: {len(code_alpaca_20_80)}")
从 HuggingFace 加载特定子集
如果数据集包含子集,你可以像在 HuggingFace 中使用 load_dataset
一样将其作为参数传递
gms8k = dl.from_huggingface(
"gsm8k",
"main",
input_keys = ("question",),
)
print(f"Keys present in the returned dict: {list(gms8k.keys())}")
print(f"Number of examples in train set: {len(gms8k['train'])}")
print(f"Number of examples in test set: {len(gms8k['test'])}")
从 CSV 加载
你可以通过在参数中指定来仅选择 csv 中的特定列
dolly_100_dataset = dl.from_csv(
"dolly_subset_100_rows.csv",
fields=("instruction", "context", "response"),
input_keys=("instruction", "context")
)
分割 dspy.Example
列表
splits = dl.train_test_split(dataset, train_size=0.8) # `dataset` is a List of dspy.Example
train_dataset = splits['train']
test_dataset = splits['test']
从 dspy.Example
列表采样
DSPy 程序
dspy.Signature
class BasicQA(dspy.Signature):
"""Answer questions with short factoid answers."""
question = dspy.InputField()
answer = dspy.OutputField(desc="often between 1 and 5 words")
dspy.ChainOfThought
generate_answer = dspy.ChainOfThought(BasicQA)
# Call the predictor on a particular input alongside a hint.
question='What is the color of the sky?'
pred = generate_answer(question=question)
dspy.ChainOfThoughtwithHint
generate_answer = dspy.ChainOfThoughtWithHint(BasicQA)
# Call the predictor on a particular input alongside a hint.
question='What is the color of the sky?'
hint = "It's what you often see during a sunny day."
pred = generate_answer(question=question, hint=hint)
dspy.ProgramOfThought
pot = dspy.ProgramOfThought(BasicQA)
question = 'Sarah has 5 apples. She buys 7 more apples from the store. How many apples does Sarah have now?'
result = pot(question=question)
print(f"Question: {question}")
print(f"Final Predicted Answer (after ProgramOfThought process): {result.answer}")
dspy.ReACT
react_module = dspy.ReAct(BasicQA)
question = 'Sarah has 5 apples. She buys 7 more apples from the store. How many apples does Sarah have now?'
result = react_module(question=question)
print(f"Question: {question}")
print(f"Final Predicted Answer (after ReAct process): {result.answer}")
dspy.Retrieve
colbertv2_wiki17_abstracts = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')
dspy.settings.configure(rm=colbertv2_wiki17_abstracts)
#Define Retrieve Module
retriever = dspy.Retrieve(k=3)
query='When was the first FIFA World Cup held?'
# Call the retriever on a particular query.
topK_passages = retriever(query).passages
for idx, passage in enumerate(topK_passages):
print(f'{idx+1}]', passage, '\n')
DSPy 指标
将函数用作指标
要创建自定义指标,你可以创建一个返回数字或布尔值的函数
def parse_integer_answer(answer, only_first_line=True):
try:
if only_first_line:
answer = answer.strip().split('\n')[0]
# find the last token that has a number in it
answer = [token for token in answer.split() if any(c.isdigit() for c in token)][-1]
answer = answer.split('.')[0]
answer = ''.join([c for c in answer if c.isdigit()])
answer = int(answer)
except (ValueError, IndexError):
# print(answer)
answer = 0
return answer
# Metric Function
def gsm8k_metric(gold, pred, trace=None) -> int:
return int(parse_integer_answer(str(gold.answer))) == int(parse_integer_answer(str(pred.answer)))
将 LLM 用作评判者
class FactJudge(dspy.Signature):
"""Judge if the answer is factually correct based on the context."""
context = dspy.InputField(desc="Context for the prediction")
question = dspy.InputField(desc="Question to be answered")
answer = dspy.InputField(desc="Answer for the question")
factually_correct = dspy.OutputField(desc="Is the answer factually correct based on the context?", prefix="Factual[Yes/No]:")
judge = dspy.ChainOfThought(FactJudge)
def factuality_metric(example, pred):
factual = judge(context=example.context, question=example.question, answer=pred.answer)
return int(factual=="Yes")
DSPy 评估
from dspy.evaluate import Evaluate
evaluate_program = Evaluate(devset=devset, metric=your_defined_metric, num_threads=NUM_THREADS, display_progress=True, display_table=num_rows_to_display)
evaluate_program(your_dspy_program)
DSPy 优化器
LabeledFewShot
from dspy.teleprompt import LabeledFewShot
labeled_fewshot_optimizer = LabeledFewShot(k=8)
your_dspy_program_compiled = labeled_fewshot_optimizer.compile(student = your_dspy_program, trainset=trainset)
BootstrapFewShot
from dspy.teleprompt import BootstrapFewShot
fewshot_optimizer = BootstrapFewShot(metric=your_defined_metric, max_bootstrapped_demos=4, max_labeled_demos=16, max_rounds=1, max_errors=5)
your_dspy_program_compiled = fewshot_optimizer.compile(student = your_dspy_program, trainset=trainset)
使用另一个 LM 进行编译,在 teacher_settings 中指定
from dspy.teleprompt import BootstrapFewShot
fewshot_optimizer = BootstrapFewShot(metric=your_defined_metric, max_bootstrapped_demos=4, max_labeled_demos=16, max_rounds=1, max_errors=5, teacher_settings=dict(lm=gpt4))
your_dspy_program_compiled = fewshot_optimizer.compile(student = your_dspy_program, trainset=trainset)
编译已编译的程序 - 引导已引导的程序
your_dspy_program_compiledx2 = teleprompter.compile(
your_dspy_program,
teacher=your_dspy_program_compiled,
trainset=trainset,
)
保存/加载已编译的程序
BootstrapFewShotWithRandomSearch
有关 BootstrapFewShotWithRandomSearch 的详细文档可以在此处找到。
from dspy.teleprompt import BootstrapFewShotWithRandomSearch
fewshot_optimizer = BootstrapFewShotWithRandomSearch(metric=your_defined_metric, max_bootstrapped_demos=2, num_candidate_programs=8, num_threads=NUM_THREADS)
your_dspy_program_compiled = fewshot_optimizer.compile(student = your_dspy_program, trainset=trainset, valset=devset)
其他自定义配置类似于自定义 BootstrapFewShot
优化器。
Ensemble
from dspy.teleprompt import BootstrapFewShotWithRandomSearch
from dspy.teleprompt.ensemble import Ensemble
fewshot_optimizer = BootstrapFewShotWithRandomSearch(metric=your_defined_metric, max_bootstrapped_demos=2, num_candidate_programs=8, num_threads=NUM_THREADS)
your_dspy_program_compiled = fewshot_optimizer.compile(student = your_dspy_program, trainset=trainset, valset=devset)
ensemble_optimizer = Ensemble(reduce_fn=dspy.majority)
programs = [x[-1] for x in your_dspy_program_compiled.candidate_programs]
your_dspy_program_compiled_ensemble = ensemble_optimizer.compile(programs[:3])
BootstrapFinetune
from dspy.teleprompt import BootstrapFewShotWithRandomSearch, BootstrapFinetune
#Compile program on current dspy.settings.lm
fewshot_optimizer = BootstrapFewShotWithRandomSearch(metric=your_defined_metric, max_bootstrapped_demos=2, num_threads=NUM_THREADS)
your_dspy_program_compiled = tp.compile(your_dspy_program, trainset=trainset[:some_num], valset=trainset[some_num:])
#Configure model to finetune
config = dict(target=model_to_finetune, epochs=2, bf16=True, bsize=6, accumsteps=2, lr=5e-5)
#Compile program on BootstrapFinetune
finetune_optimizer = BootstrapFinetune(metric=your_defined_metric)
finetune_program = finetune_optimizer.compile(your_dspy_program, trainset=some_new_dataset_for_finetuning_model, **config)
finetune_program = your_dspy_program
#Load program and activate model's parameters in program before evaluation
ckpt_path = "saved_checkpoint_path_from_finetuning"
LM = dspy.HFModel(checkpoint=ckpt_path, model=model_to_finetune)
for p in finetune_program.predictors():
p.lm = LM
p.activated = False
COPRO
有关 COPRO 的详细文档可以在此处找到。
from dspy.teleprompt import COPRO
eval_kwargs = dict(num_threads=16, display_progress=True, display_table=0)
copro_teleprompter = COPRO(prompt_model=model_to_generate_prompts, metric=your_defined_metric, breadth=num_new_prompts_generated, depth=times_to_generate_prompts, init_temperature=prompt_generation_temperature, verbose=False)
compiled_program_optimized_signature = copro_teleprompter.compile(your_dspy_program, trainset=trainset, eval_kwargs=eval_kwargs)
MIPRO
from dspy.teleprompt import MIPRO
teleprompter = MIPRO(prompt_model=model_to_generate_prompts, task_model=model_that_solves_task, metric=your_defined_metric, num_candidates=num_new_prompts_generated, init_temperature=prompt_generation_temperature)
kwargs = dict(num_threads=NUM_THREADS, display_progress=True, display_table=0)
compiled_program_optimized_bayesian_signature = teleprompter.compile(your_dspy_program, trainset=trainset, num_trials=100, max_bootstrapped_demos=3, max_labeled_demos=5, eval_kwargs=kwargs)
MIPROv2
注意:详细文档可以在此处找到。MIPROv2
是 MIPRO
的最新扩展,包括以下更新:(1) 改进了指令建议,以及 (2) 使用小批量处理提高了搜索效率。
使用 MIPROv2 进行优化
这展示了如何使用 auto=light
进行简单的开箱即用运行,它会为你配置许多超参数并执行轻量级优化运行。你也可以将 auto
设置为 medium
或 heavy
以执行更长时间的优化运行。有关 MIPROv2
的详细文档可以在此处找到,其中还提供了更多关于如何手动设置超参数的信息。
# Import the optimizer
from dspy.teleprompt import MIPROv2
# Initialize optimizer
teleprompter = MIPROv2(
metric=gsm8k_metric,
auto="light", # Can choose between light, medium, and heavy optimization runs
)
# Optimize program
print(f"Optimizing program with MIPRO...")
optimized_program = teleprompter.compile(
program.deepcopy(),
trainset=trainset,
max_bootstrapped_demos=3,
max_labeled_demos=4,
requires_permission_to_run=False,
)
# Save optimize program for future use
optimized_program.save(f"mipro_optimized")
# Evaluate optimized program
print(f"Evaluate optimized program...")
evaluate(optimized_program, devset=devset[:])
仅使用 MIPROv2 优化指令 (0-Shot)
# Import the optimizer
from dspy.teleprompt import MIPROv2
# Initialize optimizer
teleprompter = MIPROv2(
metric=gsm8k_metric,
auto="light", # Can choose between light, medium, and heavy optimization runs
)
# Optimize program
print(f"Optimizing program with MIPRO...")
optimized_program = teleprompter.compile(
program.deepcopy(),
trainset=trainset,
max_bootstrapped_demos=0,
max_labeled_demos=0,
requires_permission_to_run=False,
)
# Save optimize program for future use
optimized_program.save(f"mipro_optimized")
# Evaluate optimized program
print(f"Evaluate optimized program...")
evaluate(optimized_program, devset=devset[:])
带类型的 Signature 优化器
from dspy.teleprompt.signature_opt_typed import optimize_signature
from dspy.evaluate.metrics import answer_exact_match
from dspy.functional import TypedChainOfThought
compiled_program = optimize_signature(
student=TypedChainOfThought("question -> answer"),
evaluator=Evaluate(devset=devset, metric=answer_exact_match, num_threads=10, display_progress=True),
n_iterations=50,
).program
KNNFewShot
from sentence_transformers import SentenceTransformer
from dspy import Embedder
from dspy.teleprompt import KNNFewShot
from dspy import ChainOfThought
knn_optimizer = KNNFewShot(k=3, trainset=trainset, vectorizer=Embedder(SentenceTransformer("all-MiniLM-L6-v2").encode))
qa_compiled = knn_optimizer.compile(student=ChainOfThought("question -> answer"))
BootstrapFewShotWithOptuna
from dspy.teleprompt import BootstrapFewShotWithOptuna
fewshot_optuna_optimizer = BootstrapFewShotWithOptuna(metric=your_defined_metric, max_bootstrapped_demos=2, num_candidate_programs=8, num_threads=NUM_THREADS)
your_dspy_program_compiled = fewshot_optuna_optimizer.compile(student=your_dspy_program, trainset=trainset, valset=devset)
其他自定义配置类似于自定义 dspy.BootstrapFewShot
优化器。
DSPy Refine
和 BestofN
在 DSPy 2.6 中,
dspy.Suggest
和dspy.Assert
已被dspy.Refine
和dspy.BestofN
替代。
BestofN
使用不同的温度运行模块最多 N
次,并返回由 reward_fn
定义的最佳预测,或第一个通过 threshold
的预测。
import dspy
qa = dspy.ChainOfThought("question -> answer")
def one_word_answer(args, pred):
return 1.0 if len(pred.answer) == 1 else 0.0
best_of_3 = dspy.BestOfN(module=qa, N=3, reward_fn=one_word_answer, threshold=1.0)
best_of_3(question="What is the capital of Belgium?").answer
# Brussels
Refine
使用不同的温度运行模块最多 N
次来优化它,并返回由 reward_fn
定义的最佳预测,或第一个通过 threshold
的预测。在每次尝试后(最后一次除外),Refine
会自动生成关于模块性能的详细反馈,并将此反馈用作后续运行的提示,从而创建一个迭代优化过程。
import dspy
qa = dspy.ChainOfThought("question -> answer")
def one_word_answer(args, pred):
return 1.0 if len(pred.answer) == 1 else 0.0
best_of_3 = dspy.Refine(module=qa, N=3, reward_fn=one_word_answer, threshold=1.0)
best_of_3(question="What is the capital of Belgium?").answer
# Brussels
错误处理
默认情况下,Refine
会尝试运行模块最多 N 次,直到达到阈值。如果模块遇到错误,它将继续尝试,最多进行 N 次失败尝试。你可以通过将 fail_count
设置为小于 N
的数字来改变此行为。
refine = dspy.Refine(module=qa, N=3, reward_fn=one_word_answer, threshold=1.0, fail_count=1)
...
refine(question="What is the capital of Belgium?")
# If we encounter just one failed attempt, the module will raise an error.
如果你想运行模块最多 N 次而不进行任何错误处理,可以将 fail_count
设置为 N
。这是默认行为。