This code shows how RAG retrieves documents and generates an answer combining retrieved info and model knowledge.
from transformers import RagTokenizer, RagRetriever, RagSequenceForGeneration
# Initialize tokenizer, retriever, and model
tokenizer = RagTokenizer.from_pretrained('facebook/rag-sequence-nq')
retriever = RagRetriever.from_pretrained('facebook/rag-sequence-nq', index_name='exact', use_dummy_dataset=True)
model = RagSequenceForGeneration.from_pretrained('facebook/rag-sequence-nq', retriever=retriever)
# Input question
input_text = 'Who was the first person to walk on the moon?'
inputs = tokenizer(input_text, return_tensors='pt')
# Generate answer
outputs = model.generate(input_ids=inputs['input_ids'],
attention_mask=inputs['attention_mask'],
num_beams=2,
max_length=50)
# Decode and print answer
answer = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
print('Question:', input_text)
print('Answer:', answer)