import PyPDF2
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
import openai # Requires OpenAI API key
# Set your OpenAI API key
openai.api_key = “your_openai_api_key”
def extract_text_from_pdf(pdf_path):
“””Extract text from a PDF file.”””
with open(pdf_path, “rb”) as file:
reader = PyPDF2.PdfReader(file)
text = “\n”.join([page.extract_text() for page in reader.pages if page.extract_text()])
return text
def generate_summary(text):
“””Generate a summary using OpenAI GPT.”””
prompt = f”Summarize the following report in a structured format:\n{text[:4000]}”
response = openai.ChatCompletion.create(
model=”gpt-4-turbo”,
messages=[{“role”: “user”, “content”: prompt}]
)
return response[“choices”][0][“message”][“content”]
def add_summary_to_pdf(original_pdf, summary_text, output_pdf):
“””Append summary as a new page in the PDF.”””
# Read the original PDF
reader = PyPDF2.PdfReader(original_pdf)
writer = PyPDF2.PdfWriter()
# Copy original pages
for page in reader.pages:
writer.add_page(page)
# Create a new PDF for the summary
summary_pdf_path = “summary_page.pdf”
c = canvas.Canvas(summary_pdf_path, pagesize=letter)
c.drawString(100, 750, “Summary of Report”)
y = 730
for line in summary_text.split(“\n”):
c.drawString(100, y, line)
y -= 20
c.save()
# Merge the summary page
with open(summary_pdf_path, “rb”) as summary_file:
summary_reader = PyPDF2.PdfReader(summary_file)
writer.add_page(summary_reader.pages[0])
# Save the final PDF
with open(output_pdf, “wb”) as output_file:
writer.write(output_file)
print(f”Summary added. New PDF saved as: {output_pdf}”)
# — USAGE —
input_pdf = “your_report.pdf”
output_pdf = “your_report_with_summary.pdf”
# Step 1: Extract text from report
report_text = extract_text_from_pdf(input_pdf)
# Step 2: Generate summary
summary = generate_summary(report_text)
# Step 3: Add summary to the PDF
add_summary_to_pdf(input_pdf, summary, output_pdf)
Inserted into code block:
import PyPDF2
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
import openai # Requires OpenAI API key
# Set your OpenAI API key
openai.api_key = "your_openai_api_key"
def extract_text_from_pdf(pdf_path):
"""Extract text from a PDF file."""
with open(pdf_path, "rb") as file:
reader = PyPDF2.PdfReader(file)
text = "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
return text
def generate_summary(text):
"""Generate a summary using OpenAI GPT."""
prompt = f"Summarize the following report in a structured format:\n{text[:4000]}"
response = openai.ChatCompletion.create(
model="gpt-4-turbo",
messages=[{"role": "user", "content": prompt}]
)
return response["choices"][0]["message"]["content"]
def add_summary_to_pdf(original_pdf, summary_text, output_pdf):
"""Append summary as a new page in the PDF."""
# Read the original PDF
reader = PyPDF2.PdfReader(original_pdf)
writer = PyPDF2.PdfWriter()
# Copy original pages
for page in reader.pages:
writer.add_page(page)
# Create a new PDF for the summary
summary_pdf_path = "summary_page.pdf"
c = canvas.Canvas(summary_pdf_path, pagesize=letter)
c.drawString(100, 750, "Summary of Report")
y = 730
for line in summary_text.split("\n"):
c.drawString(100, y, line)
y -= 20
c.save()
# Merge the summary page
with open(summary_pdf_path, "rb") as summary_file:
summary_reader = PyPDF2.PdfReader(summary_file)
writer.add_page(summary_reader.pages[0])
# Save the final PDF
with open(output_pdf, "wb") as output_file:
writer.write(output_file)
print(f"Summary added. New PDF saved as: {output_pdf}")
# --- USAGE ---
input_pdf = "your_report.pdf"
output_pdf = "your_report_with_summary.pdf"
# Step 1: Extract text from report
report_text = extract_text_from_pdf(input_pdf)
# Step 2: Generate summary
summary = generate_summary(report_text)
# Step 3: Add summary to the PDF
add_summary_to_pdf(input_pdf, summary, output_pdf)