OpenAI ChatGPT RAG Retrieval Augmented Generation Example

parmarjatin4911@gmail.com - Jan 28 - - Dev Community

OpenAI ChatGPT RAG Retrieval Augmented Generation Example

import pandas as pd
import numpy as np
from ast import literal_eval
import openai

def get_embedding(text: str, model="text-similarity-davinci-001", **kwargs):
text = text.replace("\n", " ")
response = openai.embeddings.create(input=[text], model=model, **kwargs)
return response.data[0].embedding

def cosine_similarity(a, b):
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

Load the dataset and convert embeddings

datafile_path = "fine_food_reviews_with_embeddings.csv"
df = pd.read_csv(datafile_path)
df["embedding"] = df.embedding.apply(literal_eval).apply(np.array)

Function to search through the reviews

def search_reviews(df, product_description, n=3, pprint=True):
product_embedding = get_embedding(
product_description,
model="text-embedding-ada-002"
)
df["similarity"] = df.embedding.apply(lambda x: cosine_similarity(x, product_embedding))

results = (
    df.sort_values("similarity", ascending=False)
    .head(n)
    .combined.str.replace("Title: ", "")
    .str.replace("; Content:", ": ")
)
if pprint:
    for r in results:
        print(r[:200])
        print()
return results
Enter fullscreen mode Exit fullscreen mode

client = openai.OpenAI()

def generate_answer_with_chat(context, question):
conversation = [
{"role": "system", "content": "You are a knowledgeable assistant."},
{"role": "user", "content": f"Context: {context}"},
{"role": "user", "content": f"Question: {question}"}
]

stream = client.chat.completions.create(
    model="gpt-4",
    messages=conversation,
    stream=True,
)

response_text = ""
for chunk in stream:
    if chunk.choices[0].delta.content is not None:
        response_text += chunk.choices[0].delta.content

return response_text
Enter fullscreen mode Exit fullscreen mode

def rag_search(df, query, n=3):
# Retrieve top N similar reviews
top_reviews = search_reviews(df, query, n=n, pprint=False)

# Combine the top reviews into a single context string
context = " ".join(top_reviews)

# Generate an answer based on the context using chat API
answer = generate_answer_with_chat(context, query)

return answer
Enter fullscreen mode Exit fullscreen mode

Example usage

answer = rag_search(df, "What do people think about the texture of whole wheat pasta?")
print(answer)

Embedding

OpenAI Embedding
Enter fullscreen mode Exit fullscreen mode

Search Embedding

OpenAI Search Embedding
Enter fullscreen mode Exit fullscreen mode
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
Terabox Video Player