Prompt-Engineering RAG/ Data-Ingestion Use cases QA Generator Use LLM to Generate Question And Answer For Q&A conversational chatbot % load_ext autoreload
% autoreload 2
import boto3
import urllib.request
import math
from utils import helper
bedrock_runtime = boto3 . client ( "bedrock-runtime" )
model_id = "anthropic.claude-3-haiku-20240307-v1:0"
target_url = "https://www.gutenberg.org/ebooks/64317.txt.utf-8" # the great gatsby
data = urllib . request . urlopen ( target_url )
my_texts = []
for line in data :
my_texts . append ( line . decode ())
doc_size = 700 # size of the document to determine number of batches
batches = math . ceil ( len ( my_texts ) / doc_size )
start = 0
data_samples = {}
data_samples [ 'question' ] = []
data_samples [ 'ground_truth' ] = []
for batch in range ( batches ):
batch_text_arr = my_texts [ start : start + doc_size ]
batch_text = "" . join ( batch_text_arr )
start += doc_size
ds = helper . generate_questions ( bedrock_runtime , model_id , batch_text )
data_samples [ 'question' ] . extend ( ds [ 'question' ])
data_samples [ 'ground_truth' ] . extend ( ds [ 'ground_truth' ])
with open ( "data/qa_samples.json" , "w" ) as f :
f . write ( json . dumps ( data_samples ))