import cProfile
import pstats
from transformers import pipeline
import time

import torch
torch.set_num_threads(24)  # Adjust this to the number of threads/cores you have

# Initialize the pipeline
generator = pipeline('text-generation', model='gpt2', device_map="cpu") # gpt2

def run_inference():
    t_start = time.time()
    # Generate text
    generated_text = generator("below is a simple python function to extract email addresses from a string:", max_length=500, num_return_sequences=1)

    # Print the generated text
    print(generated_text[0]['generated_text'])
    print("took %.3fs" % (time.time() - t_start))

cProfile.run('run_inference()', 'profile_output.prof')

p = pstats.Stats('profile_output.prof')
p.sort_stats('cumulative').print_stats(30) # Show the top 10 time-consuming functions