mo-456 commited on
Commit
cdecdab
·
verified ·
1 Parent(s): 0881287

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -42
app.py CHANGED
@@ -1,55 +1,50 @@
1
- import os
2
- import gradio as gr
3
- from langchain_community.embeddings import HuggingFaceEmbeddings
4
- from langchain_community.vectorstores import FAISS
5
  from langchain_community.document_loaders import TextLoader
6
  from langchain.text_splitter import CharacterTextSplitter
 
 
7
  from langchain.chains import RetrievalQA
8
- from langchain_huggingface import HuggingFaceEndpoint
 
 
 
9
 
10
- # Load knowledge from Arabic text file
11
  loader = TextLoader("knowledge.txt", encoding="utf-8")
12
- docs = loader.load()
13
 
14
- # Split documents into chunks
15
- text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
16
- documents = text_splitter.split_documents(docs)
17
 
18
- # Arabic-capable multilingual sentence embeddings
19
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
20
 
21
- # Create FAISS vector store
22
- vectorstore = FAISS.from_documents(documents, embeddings)
23
-
24
- # Get token from secret
25
- token = os.environ.get("HUGGINGFACEHUB_API_TOKEN")
26
-
27
- # Correct way: pass temperature and max_new_tokens explicitly
28
- llm = HuggingFaceEndpoint(
29
- repo_id="tiiuae/falcon-7b-instruct",
30
- huggingfacehub_api_token=token,
31
- temperature=0.3,
32
- max_new_tokens=256
 
 
 
 
33
  )
34
 
35
- # Create the RetrievalQA chain
36
- qa = RetrievalQA.from_chain_type(
37
- llm=llm,
38
- chain_type="stuff",
39
- retriever=vectorstore.as_retriever()
40
- )
41
 
42
- # Arabic chatbot function
43
- def answer_question_arabic(question):
44
- return qa.run(question)
45
-
46
- # Gradio interface
47
- iface = gr.Interface(
48
- fn=answer_question_arabic,
49
- inputs=gr.Textbox(lines=2, placeholder="اكتب سؤالك هنا", label="سؤال"),
50
- outputs=gr.Textbox(label="الرد"),
51
- title="المساعد الذكي للقطاع الوزاري",
52
- description="اكتب أي سؤال متعلق بالخدمات أو الإجراءات داخل القطاع، وسنقدم لك الرد بناءً على قاعدة المعرفة."
53
- )
54
 
55
- iface.launch()
 
 
 
 
 
1
  from langchain_community.document_loaders import TextLoader
2
  from langchain.text_splitter import CharacterTextSplitter
3
+ from langchain_community.embeddings import HuggingFaceEmbeddings
4
+ from langchain_community.vectorstores import FAISS
5
  from langchain.chains import RetrievalQA
6
+ from langchain_community.llms import HuggingFacePipeline
7
+
8
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
9
+ import gradio as gr
10
 
11
+ # 1. Load Arabic plain text
12
  loader = TextLoader("knowledge.txt", encoding="utf-8")
13
+ documents = loader.load()
14
 
15
+ # 2. Split into chunks
16
+ text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=100)
17
+ docs = text_splitter.split_documents(documents)
18
 
19
+ # 3. Arabic-compatible embeddings
20
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
21
 
22
+ # 4. Store chunks in FAISS
23
+ vectorstore = FAISS.from_documents(docs, embeddings)
24
+ retriever = vectorstore.as_retriever()
25
+
26
+ # 5. Load Arabic-compatible LLM
27
+ model_name = "remzicam/arabic-llama-cpu"
28
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
29
+ model = AutoModelForCausalLM.from_pretrained(model_name)
30
+
31
+ generator = pipeline(
32
+ "text-generation",
33
+ model=model,
34
+ tokenizer=tokenizer,
35
+ max_new_tokens=256,
36
+ temperature=0.7,
37
+ do_sample=True,
38
  )
39
 
40
+ llm = HuggingFacePipeline(pipeline=generator)
 
 
 
 
 
41
 
42
+ # 6. Retrieval + QA chain
43
+ qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
44
+
45
+ # 7. Gradio Interface
46
+ def answer_question(question):
47
+ result = qa_chain.run(question)
48
+ return result[:1500]
 
 
 
 
 
49
 
50
+ gr.Interface(fn=answer_question, inputs="text", outputs="text", title="🤖 الدليل العربي الذكي").launch()