QA Generator

Use LLM to Generate Question And Answer For Q&A conversational chatbot

%load_ext autoreload
%autoreload 2

import boto3
import urllib.request
import math
from utils import helper

bedrock_runtime = boto3.client("bedrock-runtime")
model_id = "anthropic.claude-3-haiku-20240307-v1:0"

target_url = "https://www.gutenberg.org/ebooks/64317.txt.utf-8" # the great gatsby
data = urllib.request.urlopen(target_url)
my_texts = []
for line in data:
    my_texts.append(line.decode())

doc_size = 700 # size of the document to determine number of batches
batches = math.ceil(len(my_texts) / doc_size)

start = 0
data_samples = {}
data_samples['question'] = []
data_samples['ground_truth'] = []
for batch in range(batches):
    batch_text_arr = my_texts[start:start+doc_size]
    batch_text = "".join(batch_text_arr)
    start += doc_size
    ds = helper.generate_questions(bedrock_runtime, model_id, batch_text)
    data_samples['question'].extend(ds['question'])
    data_samples['ground_truth'].extend(ds['ground_truth'])

data_samples

import json

with open("data/qa_samples.json", "w") as f:
    f.write(json.dumps(data_samples))

batches