Retriever

In this lesson, you will use the GraphCypherQAChain to add a text to Cypher retriever to the LangChain agent.

Text to Cypher

Open the genai_integration_langchain/cypher_retriever.py file and review the code:

python

cypher_retriever.py

import os
from dotenv import load_dotenv
load_dotenv()

from langchain_core.documents import Document
from langchain.chat_models import init_chat_model
from langgraph.graph import START, StateGraph
from langchain_core.prompts import PromptTemplate
from typing_extensions import List, TypedDict
from langchain_neo4j import Neo4jGraph

# Initialize the LLM
model = init_chat_model("gpt-4o", model_provider="openai")

# Create a prompt
template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}

Answer:"""

prompt = PromptTemplate.from_template(template)

# Define state for application
class State(TypedDict):
    question: str
    context: List[dict]
    answer: str

# Connect to Neo4j
graph = Neo4jGraph(
    url=os.getenv("NEO4J_URI"),
    username=os.getenv("NEO4J_USERNAME"), 
    password=os.getenv("NEO4J_PASSWORD"),
)

# Create the Cypher QA chain
# cypher_qa = 

# Define functions for each step in the application

# Retrieve context 
def retrieve(state: State):
    context = [
        {"data": "None"}
    ]
    return {"context": context}

# Generate the answer based on the question and context
def generate(state: State):
    messages = prompt.invoke({"question": state["question"], "context": state["context"]})
    response = model.invoke(messages)
    return {"answer": response.content}

# Define application steps
workflow = StateGraph(State).add_sequence([retrieve, generate])
workflow.add_edge(START, "retrieve")
app = workflow.compile()

# Run the application
question = "What movies has Tom Hanks acted in?"
response = app.invoke({"question": question})
print("Answer:", response["answer"])
print("Context:", response["context"])

You will need to:

Create a GraphCypherQAChain instance.
Update the retriever function to use the GraphCypherQAChain to convert natural language queries into Cypher queries.

Create a new GraphCypherQAChain instance:

python

from langchain_neo4j import GraphCypherQAChain

# Create the Cypher QA chain
cypher_qa = GraphCypherQAChain.from_llm(
    graph=graph, 
    llm=model, 
    allow_dangerous_requests=True,
    return_direct=True,
)

Return Direct

The return_direct parameter is set to True to return the result of the Cypher query instead of an answer.

This is useful when you want to pass the raw data to the agent for further processing or analysis.

Update the retrieve function to invoke the GraphCypherQAChain:

python

# Retrieve context 
def retrieve(state: State):
    context = cypher_qa.invoke(
        {"query": state["question"]}
    )
    return {"context": context}

Experiment with the retriever by running the program and asking questions about data in the graph:

What movies has Tom Hanks acted in?
How much money did the movie Forrest Gump make at the box office?
Compare the plots of Toy Story and Toy Story 2?
How long is the movie Clueless?
What languages are spoken in the movie GoldenEye

Verbose

Set the verbose parameter of the GraphCypherQAChain to True to see the generated Cypher queries and the full context used to generate the answers.

Click to view the complete code

python

import os
from dotenv import load_dotenv
load_dotenv()

from langchain_core.documents import Document
from langchain.chat_models import init_chat_model
from langgraph.graph import START, StateGraph
from langchain_core.prompts import PromptTemplate
from typing_extensions import List, TypedDict
from langchain_neo4j import Neo4jGraph
from langchain_neo4j import GraphCypherQAChain

# Initialize the LLM
model = init_chat_model("gpt-4o", model_provider="openai")

# Create a prompt
template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}

Answer:"""

prompt = PromptTemplate.from_template(template)

# Define state for application
class State(TypedDict):
    question: str
    context: List[dict]
    answer: str

# Connect to Neo4j
graph = Neo4jGraph(
    url=os.getenv("NEO4J_URI"),
    username=os.getenv("NEO4J_USERNAME"), 
    password=os.getenv("NEO4J_PASSWORD"),
)

# Create the Cypher QA chain
cypher_qa = GraphCypherQAChain.from_llm(
    graph=graph, 
    llm=model, 
    allow_dangerous_requests=True,
    return_direct=True,
)

# Define functions for each step in the application

# Retrieve context 
def retrieve(state: State):
    context = cypher_qa.invoke(
        {"query": state["question"]}
    )
    return {"context": context}

# Generate the answer based on the question and context
def generate(state: State):
    messages = prompt.invoke({"question": state["question"], "context": state["context"]})
    response = model.invoke(messages)
    return {"answer": response.content}

# Define application steps
workflow = StateGraph(State).add_sequence([retrieve, generate])
workflow.add_edge(START, "retrieve")
app = workflow.compile()

# Run the application
question = "What movies has Tom Hanks acted in?"
response = app.invoke({"question": question})
print("Answer:", response["answer"])
print("Context:", response["context"])

Improve the retriever

Your challenge is to improve the retriever using the techniques you learned in the previous lessons, which could include:

Providing a custom prompt and specific instructions.
Including example questions and Cypher queries.
Using a different LLM model for Cypher generation.
Restricting the schema to provide more focused results.

Here are some examples of more complex questions you can try:

When was the movie The Abyss released?
What is the highest grossing movie of all time?
Can you recommend a Horror movie based on user rating?
What movies scored about 4 for user rating?
What are the highest rated movies with more than 100 ratings?

There is no right or wrong solution. You should experiment with different approaches to see how they affect the accuracy and relevance of the generated Cypher queries.

Verbose

When adapting the retriever, you may want to set the verbose parameter of the GraphCypherQAChain to True to see the generated Cypher queries and the full context used to generate the answers.

Click here to see an example solution

This example uses some of the techniques you learned in the previous lessons to improve the retriever:

python

import os
from dotenv import load_dotenv
load_dotenv()

from langchain_core.documents import Document
from langchain.chat_models import init_chat_model
from langgraph.graph import START, StateGraph
from langchain_core.prompts import PromptTemplate
from typing_extensions import List, TypedDict
from langchain_neo4j import Neo4jGraph
from langchain_neo4j import GraphCypherQAChain

# Initialize the LLM
model = init_chat_model("gpt-4o", model_provider="openai")

# Create a prompt
template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}

Answer:"""

prompt = PromptTemplate.from_template(template)

# Define state for application
class State(TypedDict):
    question: str
    context: List[dict]
    answer: str

# Connect to Neo4j
graph = Neo4jGraph(
    url=os.getenv("NEO4J_URI"),
    username=os.getenv("NEO4J_USERNAME"), 
    password=os.getenv("NEO4J_PASSWORD"),
)

cypher_template = """Task:Generate Cypher statement to query a graph database.
Instructions:
Use only the provided relationship types and properties in the schema.
Do not use any other relationship types or properties that are not provided.
For movie titles that begin with "The", move "the" to the end, for example "The 39 Steps" becomes "39 Steps, The".
Exclude NULL values when finding the highest value of a property.

Schema:
{schema}
Examples:
1. Question: Get user ratings?
   Cypher: MATCH (u:User)-[r:RATED]->(m:Movie) WHERE u.name = "User name" RETURN r.rating AS userRating
2. Question: Get average rating for a movie?
   Cypher: MATCH (m:Movie)<-[r:RATED]-(u:User) WHERE m.title = 'Movie Title' RETURN avg(r.rating) AS userRating
3. Question: Get movies for a genre?
   Cypher: MATCH ((m:Movie)-[:IN_GENRE]->(g:Genre) WHERE g.name = 'Genre Name' RETURN m.title AS movieTitle

Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.

The question is:
{question}"""

cypher_prompt = PromptTemplate(
    input_variables=["schema", "question"], 
    template=cypher_template
)

# Create the Cypher QA chain
cypher_qa = GraphCypherQAChain.from_llm(
    graph=graph, 
    llm=model, 
    cypher_prompt=cypher_prompt,
    allow_dangerous_requests=True,
    verbose=True,
)

# Define functions for each step in the application

# Retrieve context 
def retrieve(state: State):
    context = cypher_qa.invoke(
        {"query": state["question"]}
    )
    return {"context": context}

# Generate the answer based on the question and context
def generate(state: State):
    messages = prompt.invoke({"question": state["question"], "context": state["context"]})
    response = model.invoke(messages)
    return {"answer": response.content}

# Define application steps
workflow = StateGraph(State).add_sequence([retrieve, generate])
workflow.add_edge(START, "retrieve")
app = workflow.compile()

# Run the application
question = "What is the highest grossing movie of all time?"
response = app.invoke({"question": question})
print("Answer:", response["answer"])
print("Context:", response["context"])

Congratulations!

When you are ready, click continue to complete the course.

Lesson Summary

In this lesson, you learned how to create a text to Cypher retriever and customized it to improve the quality of the generated Cypher queries.

Congratulations on completing the course!

Using Neo4j with LangChain

Neo4j and LangChain

Vectors

Text to Cypher

Retriever

Text to Cypher

Improve the retriever

Congratulations!

Lesson Summary

Chatbot